def test_load_ptx(self): # generate ptx in the temp dir file_path = self._generate_file('ptx') # load ptx and test the kernel mod = cupy.RawModule(path=file_path, backend=self.backend) ker = mod.get_function('test_div') x1, x2, y = self._helper(ker, cupy.float32) assert cupy.allclose(y, x1 / (x2 + 1.0))
def test_context_switch_RawKernel(self): # run test_basic() on another device # For RawKernel, we need to launch it once to force compiling x1, x2, y = self._helper(self.kern, cupy.float32) cupy.cuda.runtime.setDevice(1) x1, x2, y = self._helper(self.kern, cupy.float32) assert cupy.allclose(y, x1 + x2)
def general_array_equality(arr1, arr2): """Allows checking of equality with both HOOMDArrays and HOOMDGPUArrays.""" if any(np.issubdtype(a.dtype, np.floating) for a in (arr1, arr2)): if any(isinstance(a, HOOMDGPUArray) for a in (arr1, arr2)): return cupy.allclose(arr1, arr2) else: return np.allclose(arr1, arr2) else: return all(arr1.ravel() == arr2.ravel())
def test_transform_resnet18(self): """ """ cp.random.seed(0) cp.cuda.Device(0).use() with chainer.using_config('dtype', 'float16'): cfg = { 'loss_scale_method': 'fixed', 'fixed_loss_scale': 1., } net1 = resnet18(n_class=10) net1.to_device(0) x_data = cp.random.normal(size=(2, 3, 224, 224)).astype('float16') x = chainer.Variable(x_data) y1 = net1(x) net1_params = list(net1.namedparams()) net2 = AdaLossScaled(net1, init_scale=1., transforms=[ AdaLossTransformLinear(), AdaLossTransformBasicBlock(), AdaLossTransformConv2DBNActiv(), ], cfg=cfg, verbose=True) net2.to_device(0) y2 = net2(x) net2_params = list(net2.namedparams()) self.assertEqual(len(net1_params), len(net2_params)) for i, p in enumerate(net1_params): self.assertTrue( cp.allclose(p[1].array, net2_params[i][1].array)) self.assertTrue(cp.allclose(y1.array, y2.array)) # Should not raise error y_data = cp.random.normal(size=(2, 10)).astype('float16') y2.grad = y_data y2.backward()
def test_cuFloatComplex(self): N = 100 block = 32 grid = (N + block - 1) // block dtype = cupy.complex64 mod = cupy.RawModule( code=_test_cuComplex, translate_cucomplex=True) a = cupy.random.random((N,)) + 1j*cupy.random.random((N,)) a = a.astype(dtype) b = cupy.random.random((N,)) + 1j*cupy.random.random((N,)) b = b.astype(dtype) c = cupy.random.random((N,)) + 1j*cupy.random.random((N,)) c = c.astype(dtype) out = cupy.zeros((N,), dtype=dtype) out_float = cupy.zeros((N,), dtype=cupy.float32) out_up = cupy.zeros((N,), dtype=cupy.complex128) ker = mod.get_function('test_addf') ker((grid,), (block,), (a, b, out)) assert (out == a + b).all() ker = mod.get_function('test_subf') ker((grid,), (block,), (a, b, out)) assert (out == a - b).all() ker = mod.get_function('test_mulf') ker((grid,), (block,), (a, b, out)) assert (out == a * b).all() ker = mod.get_function('test_divf') ker((grid,), (block,), (a, b, out)) assert (out == a / b).all() ker = mod.get_function('test_conjf') ker((grid,), (block,), (a, out)) assert (out == cupy.conj(a)).all() ker = mod.get_function('test_absf') ker((grid,), (block,), (a, out_float)) assert (out_float == cupy.abs(a)).all() ker = mod.get_function('test_fmaf') ker((grid,), (block,), (a, b, c, out)) assert (out == a * b + c).all() ker = mod.get_function('test_makef') ker((grid,), (block,), (out,)) # because of precision issue, the (A==B).all() semantics would fail assert cupy.allclose(out, 1.8 - 1j * 8.7) ker = mod.get_function('test_upcast') ker((grid,), (block,), (a, out_up)) assert (out_up == a.astype(cupy.complex128)).all()
def _helper2(self, type_str): mod2 = cupy.RawModule(code=std_code, jitify=self.jitify, name_expressions=['shift<%s>' % type_str, ], options=('--std=c++11',)) ker = mod2.get_function('shift<%s>' % type_str) N = 256 a = cupy.random.random_integers(0, 7, N).astype(cupy.int32) b = a.copy() ker((1,), (N,), (a, N)) assert cupy.allclose(a, b+100)
def _helper(self): N = 10 x1 = cupy.arange(N**2, dtype=cupy.float32).reshape(N, N) x2 = cupy.ones((N, N), dtype=cupy.float32) y = cupy.zeros((N, N), dtype=cupy.float32) if self.raw == 'ker': ker = self.ker else: ker = self.mod.get_function('test_sum') ker((N,), (N,), (x1, x2, y, N**2)) assert cupy.allclose(x1 + x2, y)
def test_compile_kernel(self): kern = cupy.RawKernel( _test_compile_src, 'test_op', options=('-DOP=+',), backend=self.backend) log = io.StringIO() with use_temporary_cache_dir(): kern.compile(log_stream=log) assert 'warning' in log.getvalue() x1, x2, y = self._helper(kern, cupy.float32) assert cupy.allclose(y, x1 + x2)
def test_compile_module(self): module = cupy.RawModule(code=_test_compile_src, backend=self.backend, options=('-DOP=+', )) log = io.StringIO() with use_temporary_cache_dir(): module.compile(log_stream=log) assert 'warning' in log.getvalue() kern = module.get_function('test_op') x1, x2, y = self._helper(kern, cupy.float32) assert cupy.allclose(y, x1 + x2)
def test_context_switch_RawModule4(self): # run test_load_cubin() on another device # generate cubin in the temp dir and load it on device 0 file_path = self._generate_file('cubin') mod = cupy.RawModule(path=file_path, backend=self.backend) ker = mod.get_function('test_div') # in this test, reloading happens at kernel launch cupy.cuda.runtime.setDevice(1) x1, x2, y = self._helper(ker, cupy.float32) assert cupy.allclose(y, x1 / (x2 + 1.0))
def test_distance(self): total_samples = 2 # window = 3 long_window = 59 short_window = 19 target_vol = 0.05 log_return = self.df first_sample = log_return['sample_id'].min().item() all_dates = log_return[first_sample == log_return['sample_id']]['date'] all_dates = all_dates.reset_index(drop=True) months_start = _get_month_start_pos(all_dates) for window in range(len(months_start)): if (months_start[window] - long_window) > 0: break print(window) print('offset', months_start[window] - long_window) port_return_ma = log_return['portfolio'].values.reshape( total_samples, -1) number_of_threads = 256 num_months = len(months_start) - window if num_months == 0: # this case, use all the data to compute num_months = 1 number_of_blocks = num_months * total_samples leverage = cupy.zeros((total_samples, num_months)) leverage_for_target_vol[(number_of_blocks, ), (number_of_threads, ), 0, 256 * MAX_YEARS * 8](leverage, port_return_ma, months_start, num_months, window, long_window, short_window, target_vol) for sample in range(2): for num in range(num_months): end_id = months_start[num + window] mean = port_return_ma[sample, end_id - long_window:end_id].mean() sd_long = cupy.sqrt( ((port_return_ma[sample, end_id - long_window:end_id] - mean)**2).mean()) # print('long', sd_long) mean = (port_return_ma[sample, end_id - short_window:end_id].mean()) sd_short = cupy.sqrt( ((port_return_ma[sample, end_id - short_window:end_id] - mean)**2).mean()) # print('sort', sd_short) max_sd = max(sd_long, sd_short) lev = target_vol / (max_sd * math.sqrt(252)) # print(lev) # print(leverage[sample, num], lev-leverage[sample, num]) # compute = means[sample][num] self.assertTrue(cupy.allclose(leverage[sample, num], lev))
def test_multichannel(): a = cp.zeros((5, 5, 3)) a[1, 1] = cp.arange(1, 4) gaussian_rgb_a = gaussian(a, sigma=1, mode='reflect', multichannel=True) # Check that the mean value is conserved in each channel # (color channels are not mixed together) assert cp.allclose([a[..., i].mean() for i in range(3)], [gaussian_rgb_a[..., i].mean() for i in range(3)]) # Test multichannel = None with expected_warnings(["multichannel"]): gaussian_rgb_a = gaussian(a, sigma=1, mode="reflect") # Check that the mean value is conserved in each channel # (color channels are not mixed together) assert cp.allclose([a[..., i].mean() for i in range(3)], [gaussian_rgb_a[..., i].mean() for i in range(3)]) # Iterable sigma gaussian_rgb_a = gaussian(a, sigma=[1, 2], mode='reflect', multichannel=True) assert cp.allclose([a[..., i].mean() for i in range(3)], [gaussian_rgb_a[..., i].mean() for i in range(3)])
def test_log_polar_mapping(): # fmt: off output_coords = cp.array([[0, 0], [0, 90], [0, 180], [0, 270], [99, 0], [99, 180], [99, 270], [99, 45]]) ground_truth = cp.array([[101, 100], [100, 101], [99, 100], [100, 99], [195.4992586, 100], [4.5007414, 100], [100, 4.5007414], [167.52817336, 167.52817336]]) # fmt: on k_angle = 360 / (2 * np.pi) k_radius = 100 / cp.log(100) center = (100, 100) coords = _log_polar_mapping(output_coords, k_angle, k_radius, center) assert cp.allclose(coords, ground_truth)
def test_linear_polar_mapping(): # fmt: off output_coords = cp.array([[0, 0], [0, 90], [0, 180], [0, 270], [99, 0], [99, 180], [99, 270], [99, 45]]) ground_truth = cp.array([[100, 100], [100, 100], [100, 100], [100, 100], [199, 100], [1, 100], [100, 1], [170.00357134, 170.00357134]]) # fmt: on k_angle = 360 / (2 * np.pi) k_radius = 1 center = (100, 100) coords = _linear_polar_mapping(output_coords, k_angle, k_radius, center) assert cp.allclose(coords, ground_truth)
def test_cupy_cufft_inverse_forward(): a = cp.array([[3.14, 4.25, 5.36], [4, 5, 6], [1.23, 4.56, 7.89]], dtype=cp.complex128) b = Test_Cupy.test_cupy_cufft_inverse_forward(a.data.ptr, a.size, a.shape[0], a.shape[1]) print() print("Test 3") print(a) print(b) assert ( cp.allclose(a, b) ) #array_uqual wont work because there is still a very very small difference
def test_template_specialization(self): if self.backend == 'nvcc': self.skipTest('nvcc does not support template specialization') # TODO(leofang): investigate why hiprtc generates a wrong source code # when the same code is compiled and discarded. It seems hiprtc has # an internal cache that conflicts with the 2nd compilation attempt. if cupy.cuda.runtime.is_hip and hasattr(self, 'clean_up'): self.skipTest('skip a potential hiprtc bug') # compile code if cupy.cuda.runtime.is_hip: # ROCm 5.0 returns HIP_HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID for # my_sqrt<complex<double>>, so we use thrust::complex<double> # instead. name_expressions = [ 'my_sqrt<int>', 'my_sqrt<float>', 'my_sqrt<thrust::complex<double>>', 'my_func' ] else: name_expressions = [ 'my_sqrt<int>', 'my_sqrt<float>', 'my_sqrt<complex<double>>', 'my_func' ] mod = cupy.RawModule(code=test_cxx_template, options=('--std=c++11', ), name_expressions=name_expressions, jitify=self.jitify) dtypes = (cupy.int32, cupy.float32, cupy.complex128, cupy.float64) for ker_T, dtype in zip(name_expressions, dtypes): # get specialized kernels if cupy.cuda.runtime.is_hip: # TODO(leofang): investigate why getLoweredName has no error # but returns an empty string for my_sqrt<complex<double>> mangled_name = mod.module.mapping.get(ker_T) if mangled_name == '': continue ker = mod.get_function(ker_T) # prepare inputs & expected outputs in_arr = cupy.testing.shaped_random((10, ), dtype=dtype) out_arr = in_arr**2 # run ker((1, ), (10, ), (in_arr, 10)) # check results assert cupy.allclose(in_arr, out_arr)
def _helper(self, header, options=()): code = header code += _test_source1 mod1 = cupy.RawModule(code=code, backend='nvrtc', options=options, jitify=self.jitify) N = 10 x1 = cupy.arange(N**2, dtype=cupy.float32).reshape(N, N) x2 = cupy.ones((N, N), dtype=cupy.float32) y = cupy.zeros((N, N), dtype=cupy.float32) ker = mod1.get_function('test_sum') ker((N,), (N,), (x1, x2, y, N**2)) assert cupy.allclose(x1 + x2, y)
def test_cupy_cufft_inverse_forward_with_caster(): a = cp.array([[3.14, 4.25, 5.36], [4, 5, 6], [1.23, 4.56, 7.89]], dtype=cp.complex128) b = cupy_ref.Cupy_Ref(ptr=a.data.ptr, shape=a.shape, dtype=a.dtype, typestr=a.dtype.str) c = Test_Cupy.test_cupy_cufft_inverse_forward_with_caster(b) print() print("Test 6") print(a) print(c) assert ( cp.allclose(a, c) ) #array_uqual wont work because there is still a very very small difference