def test_sparse(): from lbmpy.creationfunctions import create_lb_collision_rule from pystencils import get_code_str g = ListLbGenerator(create_lb_collision_rule()) kernel_code = get_code_str(g.kernel()) assert 'num_cells' in kernel_code setter_code = get_code_str(g.setter_ast()) assert 'num_cells' in setter_code getter_code = get_code_str(g.getter_ast()) assert 'num_cells' in getter_code
def test_sqrt_of_integer(): """Regression test for bug where sqrt(3) was classified as integer""" f = ps.fields("f: [1D]") tmp = sp.symbols("tmp") assignments = [ps.Assignment(tmp, sp.sqrt(3)), ps.Assignment(f[0], tmp)] arr_double = np.array([1], dtype=np.float64) kernel = ps.create_kernel(assignments).compile() kernel(f=arr_double) assert 1.7 < arr_double[0] < 1.8 f = ps.fields("f: float32[1D]") tmp = sp.symbols("tmp") assignments = [ps.Assignment(tmp, sp.sqrt(3)), ps.Assignment(f[0], tmp)] arr_single = np.array([1], dtype=np.float32) config = ps.CreateKernelConfig(data_type="float32") kernel = ps.create_kernel(assignments, config=config).compile() kernel(f=arr_single) code = ps.get_code_str(kernel.ast) # ps.show_code(kernel.ast) # 1.7320508075688772935 --> it is actually correct to round to ...773. This was wrong before !282 assert "1.7320508075688773f" in code assert 1.7 < arr_single[0] < 1.8
def test_product(default_assignment_simplifications): k = ps.TypedSymbol('k', create_type('int64')) sum = sympy.Product(k, (k, 1, 10)) expanded_sum = sum.doit() print(sum) print(expanded_sum) x = ps.fields('x: int64[1d]') assignments = ps.AssignmentCollection({x.center(): sum}) config = ps.CreateKernelConfig( default_assignment_simplifications=default_assignment_simplifications) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) kernel = ast.compile() print(code) if default_assignment_simplifications is False: assert 'int64_t product' in code array = np.zeros((10, ), np.int64) kernel(x=array) assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_sum_use_float(default_assignment_simplifications): sum = sympy.Sum(sp.abc.k, (sp.abc.k, 1, 100)) expanded_sum = sum.doit() print(sum) print(expanded_sum) x = ps.fields('x: float32[1d]') assignments = ps.AssignmentCollection({x.center(): sum}) config = ps.CreateKernelConfig( default_assignment_simplifications=default_assignment_simplifications, data_type=create_type('float32')) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) kernel = ast.compile() print(code) if default_assignment_simplifications is False: assert 'float sum' in code array = np.zeros((10, ), np.float32) kernel(x=array) assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def generate_shared_object(output_folder=None, source_files=None, show_code=False, framework_module_class=TorchModule, generate_code_only=False, update_repo_files=False): object_cache = get_cache_config()['object_cache'] module_name = 'pyronn_torch_cpp' if not output_folder: output_folder = join(dirname(__file__), '..', '..', 'generated_files') if not source_files: source_files = glob(join(dirname(__file__), 'PYRO-NN-Layers', '*.cu.cc')) cuda_sources = [] makedirs(join(object_cache, module_name), exist_ok=True) rmtree(join(object_cache, module_name, 'helper_headers'), ignore_errors=True) copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'), join(object_cache, module_name, 'helper_headers')) if update_repo_files: rmtree(join(output_folder, 'helper_headers'), ignore_errors=True) copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'), join(output_folder, 'helper_headers')) for s in source_files: dst = join(object_cache, module_name, basename(s).replace('.cu.cc', '.cu')) copyfile(s, dst) # Torch only accepts *.cu as CUDA cuda_sources.append(dst) if update_repo_files: dst = join(output_folder, basename(s).replace('.cu.cc', '.cu')) copyfile(s, dst) # Torch only accepts *.cu as CUDA module = framework_module_class(module_name, FUNCTIONS.values()) if show_code: pystencils.show_code(module, custom_backend=FrameworkIntegrationPrinter()) if generate_code_only: return module extension = module.compile(extra_source_files=cuda_sources, extra_cuda_flags=['-arch=sm_35'], with_cuda=True, compile_module_name=module_name) shared_object_file = module.compiled_file copyfile(shared_object_file, join(output_folder, module_name + '.so')) if update_repo_files: with open(join(output_folder, 'pyronn_torch.cpp'), 'w') as f: f.write(pystencils.get_code_str(module, custom_backend=FrameworkIntegrationPrinter())) return extension
def test_integer_comparision(): f = ps.fields("f [2D]") d = sp.Symbol("dir") ur = ps.Assignment(f[0, 0], sp.Piecewise((0, sp.Equality(d, 1)), (f[0, 0], True))) ast = ps.create_kernel(ur) code = ps.get_code_str(ast) assert "_data_f_00[_stride_f_1*ctr_1] = ((((dir) == (1))) ? (0.0): (_data_f_00[_stride_f_1*ctr_1]));" in code
def test_abs(): x, y, z = ps.fields('x, y, z: float64[2d]') default_int_type = create_type('int64') assignments = ps.AssignmentCollection( {x[0, 0]: sympy.Abs(cast_func(y[0, 0], default_int_type))}) config = ps.CreateKernelConfig(target=ps.Target.GPU) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) print(code) assert 'fabs(' not in code
def test_split_inner_loop(): dst = ps.fields('dst(8): double[2D]') s = sp.symbols('s_:8') x = sp.symbols('x') subexpressions = [] main = [ Assignment(dst[0, 0](0), s[0]), Assignment(dst[0, 0](1), s[1]), Assignment(dst[0, 0](2), s[2]), Assignment(dst[0, 0](3), s[3]), Assignment(dst[0, 0](4), s[4]), Assignment(dst[0, 0](5), s[5]), Assignment(dst[0, 0](6), s[6]), Assignment(dst[0, 0](7), s[7]), Assignment(x, sum(s)) ] ac = AssignmentCollection(main, subexpressions) split_groups = [[dst[0, 0](0), dst[0, 0](1)], [dst[0, 0](2), dst[0, 0](3)], [dst[0, 0](4), dst[0, 0](5)], [dst[0, 0](6), dst[0, 0](7), x]] ac.simplification_hints['split_groups'] = split_groups ast = ps.create_kernel(ac) code = ps.get_code_str(ast) # we have four inner loops as indicated in split groups (4 elements) plus one outer loop assert code.count('for') == 5 ast = ps.create_kernel(ac, target=ps.Target.GPU) code = ps.get_code_str(ast) # on GPUs is wouldn't be good to use loop splitting assert code.count('for') == 0 ac = AssignmentCollection(main, subexpressions) ast = ps.create_kernel(ac) code = ps.get_code_str(ast) # one inner loop and one outer loop assert code.count('for') == 2
def test_creation(method_enum, double_precision): """Simple test that makes sure that only float variables are created""" lbm_config = LBMConfig(method=method_enum, relaxation_rate=1.5) config = ps.CreateKernelConfig( data_type="float64" if double_precision else "float32") func = create_lb_function(lbm_config=lbm_config, config=config) code = ps.get_code_str(func) if double_precision: assert 'float' not in code assert 'double' in code else: assert 'double' not in code assert 'float' in code
def test_complex_numbers_64(assignment, target): ast = pystencils.create_kernel(assignment, target=target, data_type='double') code = pystencils.get_code_str(ast) print(code) assert "Not supported" not in code if target == pystencils.Target.GPU: pytest.importorskip('pycuda') kernel = ast.compile() assert kernel is not None
def test_scenario(method_enum, double_precision): lbm_config = LBMConfig(method=method_enum, relaxation_rate=1.5) config = ps.CreateKernelConfig( data_type="double" if double_precision else "float32") sc = create_lid_driven_cavity((16, 16, 8), lbm_config=lbm_config, config=config) sc.run(1) code = ps.get_code_str(sc.ast) if double_precision: assert 'float' not in code assert 'double' in code else: assert 'double' not in code assert 'float' in code
def test_evaluate_constant_terms(target, simplification): if target == ps.Target.GPU: pytest.importorskip("pycuda") src, dst = ps.fields('src, dst: float32[2d]') # Triggers Sympy's cos optimization assignments = ps.AssignmentCollection({src[0, 0]: -sp.cos(1) + dst[0, 0]}) config = ps.CreateKernelConfig( target=target, default_assignment_simplifications=simplification) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) if simplification: assert 'cos(' not in code else: assert 'cos(' in code print(code)
def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False): domain_size = (24, 24) # create a datahandling object dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target=Target.CPU) # fields alignment = 'cacheline' if openmp else True g = dh.add_array("g", values_per_cell=1, alignment=alignment) dh.fill("g", 1.0, ghost_layers=True) f = dh.add_array("f", values_per_cell=1, alignment=alignment) dh.fill("f", 0.0, ghost_layers=True) opt = { 'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True } update_rule = [ ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1])) ] config = ps.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp) ast = ps.create_kernel(update_rule, config=config) if instruction_set in ['sse'] or instruction_set.startswith('avx'): assert 'stream' in ast.instruction_set assert 'streamFence' in ast.instruction_set if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'): assert 'cachelineZero' in ast.instruction_set if instruction_set in ['vsx']: assert 'storeAAndFlushCacheline' in ast.instruction_set for instruction in [ 'stream', 'streamFence', 'cachelineZero', 'storeAAndFlushCacheline', 'flushCacheline' ]: if instruction in ast.instruction_set: assert ast.instruction_set[instruction].split( '{')[0] in ps.get_code_str(ast) kernel = ast.compile() dh.run_kernel(kernel) np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
def test_type_interference(): x = pystencils.fields('x: float32[3d]') assignments = pystencils.AssignmentCollection({ a: cast_func(10, create_type('float64')), b: cast_func(10, create_type('uint16')), e: 11, c: b, f: c + b, d: c + b + x.center + e, x.center: c + b + x.center }) ast = pystencils.create_kernel(assignments) code = str(pystencils.get_code_str(ast)) assert 'double a' in code assert 'uint16_t b' in code assert 'uint16_t f' in code assert 'int64_t e' in code
def test_sympy_optimizations(target, simplification): if target == ps.Target.GPU: pytest.importorskip("pycuda") src, dst = ps.fields('src, dst: float32[2d]') # Triggers Sympy's expm1 optimization # Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In # some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0 # for sympy to work properly ... assignments = ps.AssignmentCollection( {src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)}) config = ps.CreateKernelConfig( target=target, default_assignment_simplifications=simplification) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) if simplification: assert 'expm1(' in code else: assert 'expm1(' not in code
def test_sympy_assignment(default_assignment_simplifications): assignment = SympyAssignment(dst[0, 0](0), sp.log(x + 3) / sp.log(2) + sp.log(x**2 + 1)) config = ps.CreateKernelConfig( default_assignment_simplifications=default_assignment_simplifications) ast = ps.create_kernel([assignment], config=config) code = ps.get_code_str(ast) if default_assignment_simplifications: assert 'log1p' in code # constant term is directly evaluated assert 'log2' not in code else: # no optimisations will be applied so the optimised version of log will not be in the code assert 'log1p' not in code assert 'log2' not in code assignment.replace(assignment.lhs, dst[0, 0](1)) assignment.replace(assignment.rhs, sp.log(2)) assert assignment.lhs == dst[0, 0](1) assert assignment.rhs == sp.log(2)
def test_issue40(*_): """https://i10git.cs.fau.de/pycodegen/pystencils/-/issues/40""" opt = { 'instruction_set': "avx512", 'assume_aligned': False, 'nontemporal': False, 'assume_inner_stride_one': True } src = ps.fields("src(1): double[2D]", layout='fzyx') eq = [ ps.Assignment(sp.Symbol('rho'), 1.0), ps.Assignment(src[0, 0](0), sp.Rational(4, 9) * sp.Symbol('rho')) ] config = ps.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt, data_type='float64') ast = ps.create_kernel(eq, config=config) code = ps.get_code_str(ast) assert 'epi32' not in code
def code(self): return pystencils.get_code_str(self.ast)