def test_subset_minmax(self): from pycuda.curandom import rand as curand l_a = 200000 gran = 5 l_m = l_a - l_a // gran + 1 if has_double_support(): dtypes = [np.float64, np.float32, np.int32] else: dtypes = [np.float32, np.int32] for dtype in dtypes: a_gpu = curand((l_a,), dtype) a = a_gpu.get() meaningful_indices_gpu = gpuarray.zeros(l_m, dtype=np.int32) meaningful_indices = meaningful_indices_gpu.get() j = 0 for i in range(len(meaningful_indices)): meaningful_indices[i] = j j = j + 1 if j % gran == 0: j = j + 1 meaningful_indices_gpu = gpuarray.to_gpu(meaningful_indices) b = a[meaningful_indices] min_a = np.min(b) min_a_gpu = gpuarray.subset_min(meaningful_indices_gpu, a_gpu).get() assert min_a_gpu == min_a
def test_curand_wrappers(self): from pycuda.curandom import get_curand_version if get_curand_version() is None: from pytest import skip skip("curand not installed") from pycuda.curandom import ( XORWOWRandomNumberGenerator, Sobol32RandomNumberGenerator) if has_double_support(): dtypes = [np.float32, np.float64] else: dtypes = [np.float32] for gen_type in [ XORWOWRandomNumberGenerator, #Sobol32RandomNumberGenerator ]: gen = gen_type() for dtype in dtypes: gen.gen_normal(10000, dtype) # test non-Box-Muller version, if available gen.gen_normal(10001, dtype) x = gen.gen_uniform(10000, dtype) x_host = x.get() assert (-1 <= x_host).all() assert (x_host <= 1).all() gen.gen_uniform(10000, np.uint32)
def do_generate(out_type): result = True if "double" in out_type: result = result and has_double_support() if "2" in out_type: result = result and self.has_box_muller return result
def test_curand_wrappers(self): from pycuda.curandom import get_curand_version if get_curand_version() is None: from pytest import skip skip("curand not installed") generator_types = [] if get_curand_version() >= (3, 2, 0): from pycuda.curandom import XORWOWRandomNumberGenerator, Sobol32RandomNumberGenerator generator_types.extend([XORWOWRandomNumberGenerator, Sobol32RandomNumberGenerator]) if get_curand_version() >= (4, 0, 0): from pycuda.curandom import ( ScrambledSobol32RandomNumberGenerator, Sobol64RandomNumberGenerator, ScrambledSobol64RandomNumberGenerator, ) generator_types.extend( [ ScrambledSobol32RandomNumberGenerator, Sobol64RandomNumberGenerator, ScrambledSobol64RandomNumberGenerator, ] ) if get_curand_version() >= (4, 1, 0): from pycuda.curandom import MRG32k3aRandomNumberGenerator generator_types.extend([MRG32k3aRandomNumberGenerator]) if has_double_support(): dtypes = [np.float32, np.float64] else: dtypes = [np.float32] for gen_type in generator_types: gen = gen_type() for dtype in dtypes: gen.gen_normal(10000, dtype) # test non-Box-Muller version, if available gen.gen_normal(10001, dtype) if get_curand_version() >= (4, 0, 0): gen.gen_log_normal(10000, dtype, 10.0, 3.0) # test non-Box-Muller version, if available gen.gen_log_normal(10001, dtype, 10.0, 3.0) x = gen.gen_uniform(10000, dtype) x_host = x.get() assert (-1 <= x_host).all() assert (x_host <= 1).all() gen.gen_uniform(10000, np.uint32) if get_curand_version() >= (5, 0, 0): gen.gen_poisson(10000, np.uint32, 13.0)
def test_random(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.float32, np.float64] else: dtypes = [np.float32] for dtype in dtypes: a = curand((10, 100), dtype=dtype).get() assert (0 <= a).all() assert (a < 1).all()
def test_minmax(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.float64, np.float32, np.int32] else: dtypes = [np.float32, np.int32] for what in ["min", "max"]: for dtype in dtypes: a_gpu = curand((200000,), dtype) a = a_gpu.get() op_a = getattr(np, what)(a) op_a_gpu = getattr(gpuarray, what)(a_gpu).get() assert op_a_gpu == op_a, (op_a_gpu, op_a, dtype, what)
def test_curand_wrappers(self): from pycuda.curandom import get_curand_version if get_curand_version() is None: from pytest import skip skip("curand not installed") generator_types = [] if get_curand_version() >= (3, 2, 0): from pycuda.curandom import (XORWOWRandomNumberGenerator, Sobol32RandomNumberGenerator) generator_types.extend( [XORWOWRandomNumberGenerator, Sobol32RandomNumberGenerator]) if get_curand_version() >= (4, 0, 0): from pycuda.curandom import (ScrambledSobol32RandomNumberGenerator, Sobol64RandomNumberGenerator, ScrambledSobol64RandomNumberGenerator) generator_types.extend([ ScrambledSobol32RandomNumberGenerator, Sobol64RandomNumberGenerator, ScrambledSobol64RandomNumberGenerator ]) if has_double_support(): dtypes = [np.float32, np.float64] else: dtypes = [np.float32] for gen_type in generator_types: gen = gen_type() for dtype in dtypes: gen.gen_normal(10000, dtype) # test non-Box-Muller version, if available gen.gen_normal(10001, dtype) if get_curand_version() >= (4, 0, 0): gen.gen_log_normal(10000, dtype, 10.0, 3.0) # test non-Box-Muller version, if available gen.gen_log_normal(10001, dtype, 10.0, 3.0) x = gen.gen_uniform(10000, dtype) x_host = x.get() assert (-1 <= x_host).all() assert (x_host <= 1).all() gen.gen_uniform(10000, np.uint32)
def test_minmax(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.float64, np.float32, np.int32] else: dtypes = [np.float32, np.int32] for what in ["min", "max"]: for dtype in dtypes: a_gpu = curand((200000, ), dtype) a = a_gpu.get() op_a = getattr(np, what)(a) op_a_gpu = getattr(gpuarray, what)(a_gpu).get() assert op_a_gpu == op_a, (op_a_gpu, op_a, dtype, what)
def test_complex_bits(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.complex64, np.complex128] else: dtypes = [np.complex64] n = 20 for tp in dtypes: dtype = np.dtype(tp) from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) z = curand((n,), real_dtype).astype(dtype) + 1j * curand( (n,), real_dtype ).astype(dtype) assert la.norm(z.get().real - z.real.get()) == 0 assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0 # verify conj with out parameter z_out = z.astype(np.complex64) assert z_out is z.conj(out=z_out) assert la.norm(z.get().conj() - z_out.get()) < 1e-7 # verify contiguity is preserved for order in ["C", "F"]: # test both zero and non-zero value code paths z_real = gpuarray.zeros(z.shape, dtype=real_dtype, order=order) z2 = z.reshape(z.shape, order=order) for zdata in [z_real, z2]: if order == "C": assert zdata.flags.c_contiguous assert zdata.real.flags.c_contiguous assert zdata.imag.flags.c_contiguous assert zdata.conj().flags.c_contiguous elif order == "F": assert zdata.flags.f_contiguous assert zdata.real.flags.f_contiguous assert zdata.imag.flags.f_contiguous assert zdata.conj().flags.f_contiguous
def test_complex_bits(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.complex64, np.complex128] else: dtypes = [np.complex64] n = 20 for tp in dtypes: dtype = np.dtype(tp) from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) z = (curand((n,), real_dtype).astype(dtype) + 1j*curand((n,), real_dtype).astype(dtype)) assert la.norm(z.get().real - z.real.get()) == 0 assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0
def test_complex_bits(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.complex64, np.complex128] else: dtypes = [np.complex64] n = 20 for tp in dtypes: dtype = np.dtype(tp) from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) z = (curand((n, ), real_dtype).astype(dtype) + 1j * curand( (n, ), real_dtype).astype(dtype)) assert la.norm(z.get().real - z.real.get()) == 0 assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0
def test_astype(self): from pycuda.curandom import rand as curand if not has_double_support(): return a_gpu = curand((2000, ), dtype=np.float32) a = a_gpu.get().astype(np.float64) a2 = a_gpu.astype(np.float64).get() assert a2.dtype == np.float64 assert la.norm(a - a2) == 0, (a, a2) a_gpu = curand((2000, ), dtype=np.float64) a = a_gpu.get().astype(np.float32) a2 = a_gpu.astype(np.float32).get() assert a2.dtype == np.float32 assert la.norm(a - a2) / la.norm(a) < 1e-7
def test_astype(self): from pycuda.curandom import rand as curand if not has_double_support(): return a_gpu = curand((2000,), dtype=np.float32) a = a_gpu.get().astype(np.float64) a2 = a_gpu.astype(np.float64).get() assert a2.dtype == np.float64 assert la.norm(a - a2) == 0, (a, a2) a_gpu = curand((2000,), dtype=np.float64) a = a_gpu.get().astype(np.float32) a2 = a_gpu.astype(np.float32).get() assert a2.dtype == np.float32 assert la.norm(a - a2)/la.norm(a) < 1e-7
def test_complex_bits(self): from pycuda.curandom import rand as curand if has_double_support(): dtypes = [np.complex64, np.complex128] else: dtypes = [np.complex64] n = 20 for tp in dtypes: dtype = np.dtype(tp) from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) z = (curand((n,), real_dtype).astype(dtype) + 1j*curand((n,), real_dtype).astype(dtype)) assert la.norm(z.get().real - z.real.get()) == 0 assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0 # verify contiguity is preserved for order in ["C", "F"]: # test both zero and non-zero value code paths z_real = gpuarray.zeros(z.shape, dtype=real_dtype, order=order) z2 = z.reshape(z.shape, order=order) for zdata in [z_real, z2]: if order == "C": assert zdata.flags.c_contiguous == True assert zdata.real.flags.c_contiguous == True assert zdata.imag.flags.c_contiguous == True assert zdata.conj().flags.c_contiguous == True elif order == "F": assert zdata.flags.f_contiguous == True assert zdata.real.flags.f_contiguous == True assert zdata.imag.flags.f_contiguous == True assert zdata.conj().flags.f_contiguous == True
def _get_common_dtype(obj1, obj2): return _get_common_dtype_base(obj1, obj2, has_double_support())
def test_curand_wrappers(self): from pycuda.curandom import get_curand_version if get_curand_version() is None: from pytest import skip skip("curand not installed") generator_types = [] if get_curand_version() >= (3, 2, 0): from pycuda.curandom import ( XORWOWRandomNumberGenerator, Sobol32RandomNumberGenerator, ) generator_types.extend( [XORWOWRandomNumberGenerator, Sobol32RandomNumberGenerator]) if get_curand_version() >= (4, 0, 0): from pycuda.curandom import ( ScrambledSobol32RandomNumberGenerator, Sobol64RandomNumberGenerator, ScrambledSobol64RandomNumberGenerator, ) generator_types.extend([ ScrambledSobol32RandomNumberGenerator, Sobol64RandomNumberGenerator, ScrambledSobol64RandomNumberGenerator, ]) if get_curand_version() >= (4, 1, 0): from pycuda.curandom import MRG32k3aRandomNumberGenerator generator_types.extend([MRG32k3aRandomNumberGenerator]) if has_double_support(): dtypes = [np.float32, np.float64] else: dtypes = [np.float32] for gen_type in generator_types: gen = gen_type() for dtype in dtypes: gen.gen_normal(10000, dtype) # test non-Box-Muller version, if available gen.gen_normal(10001, dtype) if get_curand_version() >= (4, 0, 0): gen.gen_log_normal(10000, dtype, 10.0, 3.0) # test non-Box-Muller version, if available gen.gen_log_normal(10001, dtype, 10.0, 3.0) x = gen.gen_uniform(10000, dtype) x_host = x.get() assert (-1 <= x_host).all() assert (x_host <= 1).all() gen.gen_uniform(10000, np.uint32) if get_curand_version() >= (5, 0, 0): gen.gen_poisson(10000, np.uint32, 13.0) for dtype in dtypes + [np.uint32]: a = gpuarray.empty(1000000, dtype=dtype) v = 10 a.fill(v) gen.fill_poisson(a) tmp = (a.get() == (v - 1)).sum() / a.size # noqa: F841