def test_4d_hypervolumetric_with_coef(self): xin = testing.shaped_arange(self.shape, nlcpy).astype(self.dtype) * 0.1 n_elem = get_n_stencil_elem(self.stencil_scale) if self.coef_array: coef_shape = list(xin.shape) for an in get_axis_numbers_from_strtype(self.type): if abs(an) <= xin.ndim: coef_shape[an] -= 2 * self.stencil_scale coef = testing.shaped_arange( [ n_elem, ] + coef_shape, nlcpy, dtype=self.dtype) * 0.01 else: coef = nlcpy.arange(n_elem) * 0.1 coef = coef.astype(dtype=self.dtype) rtol = TOL_SINGLE if self.dtype == numpy.float32 else TOL_DOUBLE sca_res, sca_out = compute_with_sca(self.type, xin, self.stencil_scale, coef=coef, is_out=self.is_out, prefix=self.prefix, optimize=self.optimize, change_coef=self.change_coef) naive_res = compute_with_naive(self.type, xin, self.stencil_scale, coef=coef) if self.is_out: assert id(sca_res) == id(sca_out) testing.assert_allclose(sca_res, naive_res, rtol=rtol)
def test_multi_ndarray_5(self, dtype): xin = nlcpy.random.rand(5, 5).astype(dtype=dtype) yin = nlcpy.random.rand(5, 5).astype(dtype=dtype) zin = nlcpy.random.rand(5, 5).astype(dtype=dtype) # compute with sca dxin, dyin, dzin = nlcpy.sca.create_descriptor((xin, yin, zin)) desc = ( dxin[-1, 0] + dxin[0, 1] + dxin[1, 0] + dxin[0, -1] + dyin[-1, -1] + dzin[1, 1]) res_sca = nlcpy.sca.create_kernel(desc).execute() # compute with naive res_naive = nlcpy.zeros((5, 5), dtype=dtype) res_naive[1:-1, 1:-1] = ( xin[:-2, 1:-1] + xin[1:-1, 2:] + xin[2:, 1:-1] + xin[1:-1, :-2] + yin[:-2, :-2] + zin[2:, 2:]) rtol = TOL_SINGLE if dtype == numpy.float32 else TOL_DOUBLE testing.assert_allclose(res_sca, res_naive, rtol=rtol)
def test_irfftn(self): x = random((30, 20, 10)) assert_allclose(x, np.fft.irfftn(np.fft.rfftn(x)), atol=1e-6) assert_allclose(x, np.fft.irfftn(np.fft.rfftn(x, norm="ortho"), norm="ortho"), atol=1e-6)
def test_fft_with_order(dtype, order, fft): # Check that FFT/IFFT produces identical results for C, Fortran and # non contiguous arrays rng = np.random.RandomState(42) # X = rng.rand(8, 7, 13).astype(dtype, copy=False) X = rng.rand((8, 7, 13)).astype(dtype, copy=False) # See discussion in pull/14178 _tol = 8.0 * np.sqrt(np.log2(X.size)) * np.finfo(X.dtype).eps if order == 'F': # Y = np.asfortranarray(X) Y = np.asarray(X, order='F') else: # Make a non contiguous array # #Y = X[::-1] # #X = np.ascontiguousarray(X[::-1]) Y = X[:-1] X = np.asarray(X[:-1], order='C') if fft.__name__.endswith('fft'): for axis in range(3): X_res = fft(X, axis=axis) Y_res = fft(Y, axis=axis) assert_allclose(X_res, Y_res, atol=_tol, rtol=_tol) elif fft.__name__.endswith(('fft2', 'fftn')): axes = [(0, 1), (1, 2), (0, 2)] if fft.__name__.endswith('fftn'): axes.extend([(0, ), (1, ), (2, ), None]) for ax in axes: X_res = fft(X, axes=ax) Y_res = fft(Y, axes=ax) assert_allclose(X_res, Y_res, atol=_tol, rtol=_tol) else: raise ValueError()
def test_irfft(self): x = random(30) assert_allclose(x, np.fft.irfft(np.fft.rfft(x)), atol=1e-6) assert_allclose(x, np.fft.irfft(np.fft.rfft(x, norm="ortho"), norm="ortho"), atol=1e-6)
def test_convert_not_contiguous_2(self, dt1, dt2): xbase = nlcpy.random.rand(3, 4, 3, 4).astype(dt1) xin = nlcpy.moveaxis(xbase, 0, 2) xopt = nlcpy.sca.convert_optimized_array(xin, dtype=dt2) testing.assert_allclose(xin, xopt) assert xopt.strides != xin.strides assert xopt.dtype == dt2
def test_convert_not_contiguous_1(self, dt1, dt2): xbase = nlcpy.random.rand(5, 6, 5, 6).astype(dt1) xin = xbase[::2, ::3, ::2, ::3] xopt = nlcpy.sca.convert_optimized_array(xin, dtype=dt2) testing.assert_allclose(xin, xopt) assert xopt.strides != xin.strides assert xopt.dtype == dt2
def test_ftrace_f(self): file_name = './ftrace.out' if os.path.exists(file_name): os.remove(file_name) _helper1(self, _test_ve_adr_f, '/opt/nec/ve/bin/nfort', 'test_sum_', _ftypes[self.dtype], (uint64, uint64, uint64, uint64), uint64, ext_cflags=('-fpp', ), ftrace=True) x1, x2, y = _prep(self.dtype) err = self.kern(x1.ve_adr, x2.ve_adr, y.ve_adr, y.size, sync=self.sync, callback=self.callback) testing.assert_allclose(y, x1 + x2, err_msg='File-ID: {}'.format(self.lib.id)) if self.sync: assert err == 0 nlcpy.jit.unload_library(self.lib) assert os.path.exists(file_name) assert os.path.getsize(file_name) > 0 os.remove(file_name)
def test_assign_numpy_factor(self, dtype): xin = nlcpy.arange(10).astype(dtype) dx = nlcpy.sca.create_descriptor(xin) coef = numpy.array(-1, dtype=dtype) desc = dx[0] * coef res_sca = nlcpy.sca.create_kernel(desc).execute() res_naive = xin * coef testing.assert_allclose(res_sca, res_naive)
def test_ifft2(self): x = random((30, 20)) + 1j * random((30, 20)) assert_allclose(np.fft.ifft(np.fft.ifft(x, axis=1), axis=0), np.fft.ifft2(x), atol=1e-6) assert_allclose(np.fft.ifft2(x) * np.sqrt(30 * 20), np.fft.ifft2(x, norm="ortho"), atol=1e-6)
def _test_axes(self, op): x = random((30, 20, 10)) axes = [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)] for a in axes: op_tr = op(np.transpose(x, a)) tr_op = np.transpose(op(x, axes=a), a) assert_allclose(op_tr, tr_op, atol=1e-6)
def test_identity(self): maxlen = 512 x = random(maxlen) + 1j * random(maxlen) # xr = random(maxlen) # local variable 'xr' is assigned to but never used for i in range(1, maxlen): assert_allclose(np.fft.ifft(np.fft.fft(x[0:i])), x[0:i], atol=1e-12)
def test_hfft(self): x = random(14) + 1j * random(14) x_herm = np.concatenate((random(1), x, random(1))) # x = np.concatenate((x_herm, x[::-1].conj())) x = np.concatenate((x_herm, np.conj(x[::-1]))) assert_allclose(np.fft.fft(x), np.fft.hfft(x_herm), atol=1e-6) assert_allclose(np.fft.hfft(x_herm) / np.sqrt(30), np.fft.hfft(x_herm, norm="ortho"), atol=1e-6)
def test_ifft(self, norm): x = random(30) + 1j * random(30) assert_allclose(x, np.fft.ifft(np.fft.fft(x, norm=norm), norm=norm), atol=1e-6) # Ensure we get the correct error message with pytest.raises(ValueError): # ,match='Invalid number of FFT data points'): np.fft.ifft([], norm=norm)
def test_autogen_multi_ndarray_2d_same_shape(self, dtype): xin = nlcpy.random.rand(5, 5).astype(dtype=dtype) yin = nlcpy.random.rand(5, 5).astype(dtype=dtype) dxin, dyin = nlcpy.sca.create_descriptor((xin, yin)) res_sca = nlcpy.sca.create_kernel(dxin[...] + dyin[...]).execute() assert id(xin) != id(res_sca) assert id(yin) != id(res_sca) res_naive = xin + yin testing.assert_allclose(res_sca, res_naive)
def test_fftn(self): x = random((30, 20, 10)) + 1j * random((30, 20, 10)) assert_allclose(np.fft.fft(np.fft.fft(np.fft.fft(x, axis=2), axis=1), axis=0), np.fft.fftn(x), atol=1e-6) assert_allclose(np.fft.fftn(x) / np.sqrt(30 * 20 * 10), np.fft.fftn(x, norm="ortho"), atol=1e-6)
def test_autogen_multi_ndarray_4d_diff_shape(self, dtype): xin = nlcpy.random.rand(5, 4, 6, 7).astype(dtype=dtype) yin = nlcpy.random.rand(4, 6, 7, 3).astype(dtype=dtype) dxin, dyin = nlcpy.sca.create_descriptor((xin, yin)) res_sca = nlcpy.sca.create_kernel(dxin[...] + dyin[...]).execute() assert id(xin) != id(res_sca) assert id(yin) != id(res_sca) res_naive = xin[:4, :, :, :3] + yin[:, :4, :6, :] testing.assert_allclose(res_sca, res_naive)
def test_asl_native_f(self): self._helper(_test_asl_native_f, '/opt/nec/ve/bin/nfort', 'test_dbgmsm_', (uint64, uint64, int64, int64, int64), int64) lna, n, m, ab, ipvt = self._prep() err = self.kern(ab.ve_adr, ipvt.ve_adr, lna, n, m, sync=self.sync, callback=self.callback) testing.assert_allclose(ab[:n, n:n + m], self._make_ref(), rtol=1e-12, err_msg='File-ID: {}'.format(self.lib.id)) if self.sync: assert err == 0
def test_ve_array_cpp(self): _helper1(self, _test_ve_array_cpp, '/opt/nec/ve/bin/nc++', 'test_sum', _cpptypes[self.dtype], (void_p, void_p, void_p), uint64) x1, x2, y = _prep(self.dtype) err = self.kern(x1, x2, y, sync=self.sync, callback=self.callback) testing.assert_allclose(y, x1 + x2, err_msg='File-ID: {}'.format(self.lib.id)) if self.sync: assert err == 0
def test_assign_multiple_coef_for_multiple_description(self, dtype): xin = nlcpy.arange(10).astype(dtype) dx = nlcpy.sca.create_descriptor(xin) coef1 = nlcpy.array(-1, dtype=dtype) coef2 = nlcpy.array(2, dtype=dtype) coef3 = nlcpy.array(3, dtype=dtype) desc = dx[0] * coef1 + dx[0] * coef2 + dx[0] * coef3 res_sca = nlcpy.sca.create_kernel(desc).execute() res_naive = xin * coef1 + xin * coef2 + xin * coef3 testing.assert_allclose(res_sca, res_naive)
def test_rfft(self): x = random(30) for n in [x.size, 2 * x.size]: for norm in [None, 'ortho']: assert_allclose(np.fft.fft(x, n=n, norm=norm)[:(n // 2 + 1)], np.fft.rfft(x, n=n, norm=norm), atol=1e-6) assert_allclose(np.fft.rfft(x, n=n) / np.sqrt(n), np.fft.rfft(x, n=n, norm="ortho"), atol=1e-6)
def test_heterosolver_cpp(self): self._helper(_test_heterosolver_cpp, '/opt/nec/ve/bin/nc++', 'HS_csr_unsym_ind_0', (void_p,), int64) x = self._prep() err = self.kern(veo.OnStack(x, inout=veo.INTENT_OUT), sync=self.sync, callback=self.callback) testing.assert_allclose(x, self._make_ref(), rtol=1e-12, err_msg='File-ID: {}'.format(self.lib.id)) if self.sync: assert err == 0
def test_sblas_c(self): self._helper(_test_sblas_c, '/opt/nec/ve/bin/ncc', 'sblas_mv_csr_ind_0', (void_p, ), int64) y = self._prep() err = self.kern(veo.OnStack(y, inout=veo.INTENT_OUT), sync=self.sync, callback=self.callback) testing.assert_allclose(y, self._make_ref(), rtol=1e-12, err_msg='File-ID: {}'.format(self.lib.id)) if self.sync: assert err == 0
def test_multi_ndarray_3(self, dtype): xin = nlcpy.random.rand(7, 7).astype(dtype=dtype) yin = nlcpy.random.rand(6, 8).astype(dtype=dtype) # compute with sca dxin, dyin = nlcpy.sca.create_descriptor((xin, yin)) desc = dxin[-2, 2] + dxin[1, -1] + dyin[0, 0] res_sca = nlcpy.sca.create_kernel(desc).execute() # compute with naive x_tmp = xin[:6, :] y_tmp = yin[:, :7] res_naive = nlcpy.zeros((6, 7), dtype=dtype) res_naive[2:-1, 1:-2] = x_tmp[:-3, 3:] + x_tmp[3:, :-3] + y_tmp[2:-1, 1:-2] rtol = TOL_SINGLE if dtype == numpy.float32 else TOL_DOUBLE testing.assert_allclose(res_sca, res_naive, rtol=rtol)
def test_4d_hypervolumetric(self): nlcpy.random.seed(0) xin = testing.shaped_random(self.shape, nlcpy).astype(self.dtype) rtol = TOL_SINGLE if self.dtype == numpy.float32 else TOL_DOUBLE sca_res, sca_out = compute_with_sca( self.type, xin, self.stencil_scale, is_out=self.is_out, optimize=self.optimize, ) naive_res = compute_with_naive(self.type, xin, self.stencil_scale) if self.is_out: assert id(sca_res) == id(sca_out) testing.assert_allclose(sca_res, naive_res, rtol=rtol)
def test_from_so_c(self): _helper2(self, _test_ve_adr_c, '/opt/nec/ve/bin/ncc', 'test_sum', _ctypes[self.dtype], (uint64, uint64, uint64, uint64), uint64) x1, x2, y = _prep(self.dtype) err = self.kern(x1.ve_adr, x2.ve_adr, y.ve_adr, y.size, sync=self.sync, callback=self.callback) testing.assert_allclose(y, x1 + x2, err_msg='File-ID: {}'.format(self.lib.id)) if self.sync: assert err == 0
def test_multi_ndarray_2(self, dtype): xin = nlcpy.random.rand(5, 5).astype(dtype=dtype) yin = nlcpy.random.rand(7, 7).astype(dtype=dtype) # compute with sca dxin, dyin = nlcpy.sca.create_descriptor((xin, yin)) desc = dxin[-1, 1] + dxin[1, -1] + dyin[-1, -1] + dyin[1, 1] res_sca = nlcpy.sca.create_kernel(desc).execute() # compute with naive x_tmp = xin[:, :] y_tmp = yin[:5, :5] res_naive = nlcpy.zeros((5, 5), dtype=dtype) res_naive[1:-1, 1:-1] = (x_tmp[:-2, 2:] + x_tmp[2:, :-2] + y_tmp[:-2, :-2] + y_tmp[2:, 2:]) rtol = TOL_SINGLE if dtype == numpy.float32 else TOL_DOUBLE testing.assert_allclose(res_sca, res_naive, rtol=rtol)
def test_3d_volumetric_with_factor(self): xin = testing.shaped_arange(self.shape, nlcpy).astype(self.dtype) * 0.1 n_elem = get_n_stencil_elem(self.stencil_scale) factor = (nlcpy.arange(n_elem) * 0.1).tolist() rtol = TOL_SINGLE if self.dtype == numpy.float32 else TOL_DOUBLE sca_res, sca_out = compute_with_sca( self.type, xin, self.stencil_scale, factor=factor, is_out=self.is_out, prefix=self.prefix, optimize=self.optimize ) naive_res = compute_with_naive(self.type, xin, self.stencil_scale, factor=factor) if self.is_out: assert id(sca_res) == id(sca_out) testing.assert_allclose(sca_res, naive_res, rtol=rtol)
def test_basic_f(self): _helper1(self, _test_ve_adr_f, '/opt/nec/ve/bin/nfort', 'test_sum_', _ftypes[self.dtype], (uint64, uint64, uint64, uint64), uint64, ext_cflags=('-fpp', )) x1, x2, y = _prep(self.dtype) err = self.kern(x1.ve_adr, x2.ve_adr, y.ve_adr, y.size, sync=self.sync, callback=self.callback) testing.assert_allclose(y, x1 + x2, err_msg='File-ID: {}'.format(self.lib.id)) if self.sync: assert err == 0
def test_lapack_f(self): self._helper(_test_lapack_f, '/opt/nec/ve/bin/nfort', 'test_dgesv_', (int64, int64, uint64, int64, uint64, uint64, int64), int64) lna, n, lnb, m, a, b, ipvt = self._prep() err = self.kern(n, m, a.ve_adr, lna, ipvt.ve_adr, b.ve_adr, lnb, sync=self.sync, callback=self.callback) testing.assert_allclose(b[:n, :], self._make_ref(), rtol=1e-12, err_msg='File-ID: {}'.format(self.lib.id)) if self.sync: assert err == 0