def test_sum_use_float(): sum = sympy.Sum(k, (k, 1, 100)) expanded_sum = sum.doit() print(sum) print(expanded_sum) x = pystencils.fields('x: float32[1d]') assignments = pystencils.AssignmentCollection({x.center(): sum}) ast = pystencils.create_kernel(assignments, data_type=create_type('float32')) code = str(pystencils.show_code(ast)) kernel = ast.compile() print(code) print(pystencils.show_code(ast)) assert 'float sum' in code array = np.zeros((10, ), np.float32) kernel(x=array) assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_dynamic_matrix_location_dependent(): try: from pystencils.data_types import TypedMatrixSymbol except ImportError: import pytest pytest.skip() x, y = pystencils.fields('x, y: float32[3d]') A = TypedMatrixSymbol('A', 3, 1, create_type('double'), CustomCppType('Vector3<double>')) my_fun_call = DynamicFunction( TypedSymbol('my_fun', 'std::function<Vector3<double>(int, int, int)>'), A.dtype, *pystencils.x_vector(3)) assignments = pystencils.AssignmentCollection({ A: my_fun_call, y.center: A[0] + A[1] + A[2] }) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter()) my_fun_call = DynamicFunction( TypedSymbol('my_fun', TemplateType('Functor_T')), A.dtype, *pystencils.x_vector(3)) assignments = pystencils.AssignmentCollection({ A: my_fun_call, y.center: A[0] + A[1] + A[2] }) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
def generate_shared_object(output_folder=None, source_files=None, show_code=False, framework_module_class=TorchModule, generate_code_only=False, update_repo_files=False): object_cache = get_cache_config()['object_cache'] module_name = 'pyronn_torch_cpp' if not output_folder: output_folder = join(dirname(__file__), '..', '..', 'generated_files') if not source_files: source_files = glob(join(dirname(__file__), 'PYRO-NN-Layers', '*.cu.cc')) cuda_sources = [] makedirs(join(object_cache, module_name), exist_ok=True) rmtree(join(object_cache, module_name, 'helper_headers'), ignore_errors=True) copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'), join(object_cache, module_name, 'helper_headers')) if update_repo_files: rmtree(join(output_folder, 'helper_headers'), ignore_errors=True) copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'), join(output_folder, 'helper_headers')) for s in source_files: dst = join(object_cache, module_name, basename(s).replace('.cu.cc', '.cu')) copyfile(s, dst) # Torch only accepts *.cu as CUDA cuda_sources.append(dst) if update_repo_files: dst = join(output_folder, basename(s).replace('.cu.cc', '.cu')) copyfile(s, dst) # Torch only accepts *.cu as CUDA module = framework_module_class(module_name, FUNCTIONS.values()) if show_code: pystencils.show_code(module, custom_backend=FrameworkIntegrationPrinter()) if generate_code_only: return module extension = module.compile(extra_source_files=cuda_sources, extra_cuda_flags=['-arch=sm_35'], with_cuda=True, compile_module_name=module_name) shared_object_file = module.compiled_file copyfile(shared_object_file, join(output_folder, module_name + '.so')) if update_repo_files: with open(join(output_folder, 'pyronn_torch.cpp'), 'w') as f: f.write(pystencils.get_code_str(module, custom_backend=FrameworkIntegrationPrinter())) return extension
def test_genereric_projection(): volume = pystencils.fields('volume: float32[3d]') projections = pystencils.fields('projections: float32[2D]') projection_matrix = pystencils_reco.matrix_symbols( 'T', pystencils.data_types.create_type('float32'), 3, 4) assignments = forward_projection(volume, projections, projection_matrix) kernel = assignments.compile('gpu') pystencils.show_code(kernel)
def test_cuda_unknown(): x, y = pystencils.fields('x,y: float32 [2d]') assignments = pystencils.AssignmentCollection({ get_dummy_symbol(): sympy.Function('wtf')(address_of(y.center()), 2), }) ast = pystencils.create_kernel(assignments, target=Target.GPU) pystencils.show_code(ast)
def test_custom_backends_cpu(): z, x, y = pystencils.fields("z, y, x: [2d]") normal_assignments = pystencils.AssignmentCollection([ pystencils.Assignment(z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0])) ], []) ast = pystencils.create_kernel(normal_assignments, target=Target.CPU) pystencils.show_code(ast, ScreamingBackend()) with pytest.raises(CalledProcessError): pystencils.cpu.cpujit.make_python_function( ast, custom_backend=ScreamingBackend())
def test_cuda_but_not_c(): x, y = pystencils.fields('x,y: float32 [2d]') assignments = pystencils.AssignmentCollection({ get_dummy_symbol(): sympy.Function('atomicAdd')(address_of(y.center()), 2), y.center(): sympy.Function('rsqrtf')(x[0, 0]) }) ast = pystencils.create_kernel(assignments, target=Target.CPU) pystencils.show_code(ast)
def test_destructuring_field_class(): z, x, y = pystencils.fields("z, y, x: [2d]") normal_assignments = pystencils.AssignmentCollection([ pystencils.Assignment(z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0])) ], []) ast = pystencils.create_kernel(normal_assignments, target='gpu') print(pystencils.show_code(ast)) ast.body = DestructuringBindingsForFieldClass(ast.body) print(pystencils.show_code(ast)) ast.compile()
def test_opencl_jit_with_parameter(): z, y, x = pystencils.fields("z, y, x: [2d]") a = sp.Symbol('a') assignments = pystencils.AssignmentCollection( {z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a}) print(assignments) ast = pystencils.create_kernel(assignments, target='gpu') print(ast) code = pystencils.show_code(ast, custom_backend=CudaBackend()) print(code) opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) print(opencl_code) cuda_kernel = ast.compile() assert cuda_kernel is not None import pycuda.gpuarray as gpuarray x_cpu = np.random.rand(20, 30) y_cpu = np.random.rand(20, 30) z_cpu = np.random.rand(20, 30) x = gpuarray.to_gpu(x_cpu) y = gpuarray.to_gpu(y_cpu) z = gpuarray.to_gpu(z_cpu) cuda_kernel(x=x, y=y, z=z, a=5.) result_cuda = z.get() import pyopencl.array as array ctx = cl.create_some_context(0) queue = cl.CommandQueue(ctx) x = array.to_device(queue, x_cpu) y = array.to_device(queue, y_cpu) z = array.to_device(queue, z_cpu) opencl_kernel = make_python_function(ast, queue, ctx) assert opencl_kernel is not None opencl_kernel(x=x, y=y, z=z, a=5.) result_opencl = z.get(queue) assert np.allclose(result_cuda, result_opencl)
def test_address_of_with_cse(): x, y = pystencils.fields('x,y: int64[2d]') s = pystencils.TypedSymbol('s', PointerType(create_type('int64'))) assignments = pystencils.AssignmentCollection({ y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + s, x[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + 1 }, {}) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast) assignments_cse = sympy_cse(assignments) ast = pystencils.create_kernel(assignments_cse) pystencils.show_code(ast)
def test_address_of_with_cse(): x, y = pystencils.fields('x,y: int64[2d]') assignments = pystencils.AssignmentCollection( { y[0, 0]: cast_func(address_of(x[0, 0]), 'int64'), x[0, 0]: cast_func(address_of(x[0, 0]), 'int64') + 1 }, {}) ast = pystencils.create_kernel(assignments) code = pystencils.show_code(ast) assignments_cse = sympy_cse(assignments) ast = pystencils.create_kernel(assignments_cse) code = pystencils.show_code(ast) print(code)
def test_custom_backends_gpu(): pytest.importorskip('pycuda') import pycuda.driver import pystencils.gpucuda.cudajit z, x, y = pystencils.fields("z, y, x: [2d]") normal_assignments = pystencils.AssignmentCollection([ pystencils.Assignment(z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0])) ], []) ast = pystencils.create_kernel(normal_assignments, target=Target.GPU) pystencils.show_code(ast, ScreamingGpuBackend()) with pytest.raises(pycuda.driver.CompileError): pystencils.gpucuda.cudajit.make_python_function( ast, custom_backend=ScreamingGpuBackend())
def test_prod_var_limit(): k = pystencils.TypedSymbol('k', create_type('int64')) limit = pystencils.TypedSymbol('limit', create_type('int64')) sum = sympy.Sum(k, (k, 1, limit)) expanded_sum = sum.replace(limit, 100).doit() print(sum) print(expanded_sum) x = pystencils.fields('x: int64[1d]') assignments = pystencils.AssignmentCollection({x.center(): sum}) ast = pystencils.create_kernel(assignments) code = str(pystencils.show_code(ast)) kernel = ast.compile() print(code) array = np.zeros((10, ), np.int64) kernel(x=array, limit=100) assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_typed_matrix(): try: from pystencils.data_types import TypedMatrixSymbol except ImportError: import pytest pytest.skip() x, y = pystencils.fields('x, y: float32[3d]') A = TypedMatrixSymbol('A', 3, 1, create_type('double'), CustomCppType('Vector3<real_t>')) assignments = pystencils.AssignmentCollection( {y.center: A[0] + A[1] + A[2]}) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
def test_source_code_comment(): a, b = pystencils.fields('a,b: float[2D]') assignments = pystencils.AssignmentCollection( {a.center(): b[0, 2] + b[0, 0]}, {}) config = pystencils.CreateKernelConfig(target=pystencils.Target.CPU) ast = pystencils.create_kernel(assignments, config=config) ast.body.append(pystencils.astnodes.SourceCodeComment("Hallo")) ast.body.append(pystencils.astnodes.EmptyLine()) ast.body.append(pystencils.astnodes.SourceCodeComment("World!")) print(ast) compiled = ast.compile() assert compiled is not None pystencils.show_code(ast)
def test_cuda_known_functions(): printer = CudaSympyPrinter() print(printer.known_functions) x, y = pystencils.fields('x,y: float32 [2d]') assignments = pystencils.AssignmentCollection({ get_dummy_symbol(): sympy.Function('atomicAdd')(address_of(y.center()), 2), y.center(): sympy.Function('rsqrtf')(x[0, 0]) }) ast = pystencils.create_kernel(assignments, target=Target.GPU) pytest.importorskip('pycuda') pystencils.show_code(ast) kernel = ast.compile() assert (kernel is not None)
def test_custom_backends(): z, x, y = pystencils.fields("z, y, x: [2d]") normal_assignments = pystencils.AssignmentCollection([ pystencils.Assignment(z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0])) ], []) ast = pystencils.create_kernel(normal_assignments, target='cpu') print(pystencils.show_code(ast, ScreamingBackend())) with pytest.raises(CalledProcessError): pystencils.cpu.cpujit.make_python_function( ast, custom_backend=ScreamingBackend()) ast = pystencils.create_kernel(normal_assignments, target='gpu') print(pystencils.show_code(ast, ScreamingGpuBackend())) with pytest.raises(pycuda.driver.CompileError): pystencils.gpucuda.cudajit.make_python_function( ast, custom_backend=ScreamingGpuBackend())
def test_boundary_check(with_cse): f, g = ps.fields("f, g : [2D]") stencil = ps.Assignment(g[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) f_arr = np.random.rand(1000, 1000) g_arr = np.zeros_like(f_arr) # kernel(f=f_arr, g=g_arr) assignments = add_fixed_constant_boundary_handling( ps.AssignmentCollection([stencil]), with_cse) print(assignments) kernel_checked = ps.create_kernel(assignments, ghost_layers=0).compile() ps.show_code(kernel_checked) # No SEGFAULT, please!! kernel_checked(f=f_arr, g=g_arr)
def test_address_of(): x, y = pystencils.fields('x,y: int64[2d]') s = pystencils.TypedSymbol('s', PointerType('int64')) assignments = pystencils.AssignmentCollection( { s: address_of(x[0, 0]), y[0, 0]: cast_func(s, 'int64') }, {}) ast = pystencils.create_kernel(assignments) code = pystencils.show_code(ast) print(code) assignments = pystencils.AssignmentCollection( {y[0, 0]: cast_func(address_of(x[0, 0]), 'int64')}, {}) ast = pystencils.create_kernel(assignments) code = pystencils.show_code(ast) print(code)
def test_print_opencl(): z, y, x = pystencils.fields("z, y, x: [2d]") assignments = pystencils.AssignmentCollection( {z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])}) print(assignments) ast = pystencils.create_kernel(assignments, target='gpu') print(ast) code = pystencils.show_code(ast, custom_backend=CudaBackend()) print(code) opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) print(opencl_code) assert "__global double * RESTRICT const _data_x" in str(opencl_code) assert "__global double * RESTRICT" in str(opencl_code) assert "get_local_id(0)" in str(opencl_code)
def test_cuda_unknown(): x, y = pystencils.fields('x,y: float32 [2d]') assignments = pystencils.AssignmentCollection({ get_dummy_symbol(): sympy.Function('wtf')(address_of(y.center()), 2), }) ast = pystencils.create_kernel(assignments, 'gpu') code = str(pystencils.show_code(ast)) print(code) assert "Not supported in CUDA" in code
def test_address_of(): x, y = pystencils.fields('x,y: int64[2d]') s = pystencils.TypedSymbol('s', PointerType(create_type('int64'))) assert address_of(x[0, 0]).canonical() == x[0, 0] assert address_of(x[0, 0]).dtype == PointerType(x[0, 0].dtype, restrict=True) assert address_of(sp.Symbol("a")).dtype == PointerType('void', restrict=True) assignments = pystencils.AssignmentCollection({ s: address_of(x[0, 0]), y[0, 0]: cast_func(s, create_type('int64')) }, {}) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast) assignments = pystencils.AssignmentCollection({ y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) }, {}) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast)
def test_cuda_but_not_c(): x, y = pystencils.fields('x,y: float32 [2d]') assignments = pystencils.AssignmentCollection({ get_dummy_symbol(): sympy.Function('atomicAdd')(address_of(y.center()), 2), y.center(): sympy.Function('rsqrtf')(x[0, 0]) }) ast = pystencils.create_kernel(assignments, 'cpu') code = str(pystencils.show_code(ast)) assert "Not supported" in code
def test_dynamic_matrix(): x, y = pystencils.fields('x, y: float32[3d]') try: from pystencils.data_types import TypedMatrixSymbol except ImportError: import pytest pytest.skip() a = sp.symbols('a') A = TypedMatrixSymbol('A', 3, 1, create_type('double'), 'Vector3<double>') my_fun_call = DynamicFunction( TypedSymbol('my_fun', 'std::function<Vector3<double>(double)>'), A.dtype, a) assignments = pystencils.AssignmentCollection({ A: my_fun_call, y.center: A[0] + A[1] + A[2] }) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
def test_dynamic_function(): x, y = pystencils.fields('x, y: float32[3d]') a = sp.symbols('a') my_fun_call = DynamicFunction( TypedSymbol('my_fun', 'std::function<double(double)>'), create_type('double'), a) assignments = pystencils.AssignmentCollection( {y.center: x.center + my_fun_call}) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter()) template_fun_call = DynamicFunction( TypedSymbol('my_fun', TemplateType('Functor_T')), create_type('double'), a, x.center) assignments = pystencils.AssignmentCollection( {y.center: x.center + template_fun_call}) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
def to_dot(self, graph_style=None, with_code=False): import graphviz graph_style = {} if graph_style is None else graph_style fields = {**self.reads, **self.writes} dot = graphviz.Digraph(str(id(self))) for field, node in fields.items(): label = f'{field.name} #{field.counter}' dot.node(label, style='filled', fillcolor='#a056db', label=label) for node in self.computation_nodes: if isinstance(node, ComputationGraph): subgraph = node.to_dot(with_code=with_code) dot.subgraph(subgraph) continue elif isinstance(node.kernel, Swap): name = f'Swap {id(node)}' dot.node(str(id(node)), style='filled', fillcolor='#ff5600', label=name) elif isinstance(node.kernel, KernelFunction): if with_code: name = str(pystencils.show_code(node.kernel)) else: name = node.kernel.function_name dot.node(str(id(node)), style='filled', fillcolor='#0056db', label=name) else: raise 'foo' for input in node.input_nodes: field = input.field label = f'{field.name} #{field.counter}' dot.edge(label, str(id(node))) for output in node.output_nodes: field = output.field label = f'{field.name} #{field.counter}' dot.edge(str(id(node)), label) return dot
def test_blocking_staggered(): f, stag = ps.fields("f, stag(3): double[3D]") terms = [ f[0, 0, 0] - f[-1, 0, 0], f[0, 0, 0] - f[0, -1, 0], f[0, 0, 0] - f[0, 0, -1], ] kernel = ps.create_staggered_kernel(stag, terms, cpu_blocking=(3, 16, 8)).compile() reference_kernel = ps.create_staggered_kernel(stag, terms).compile() print(ps.show_code(kernel.ast)) f_arr = np.random.rand(80, 33, 19) stag_arr = np.zeros((80, 33, 19, 3)) stag_ref = np.zeros((80, 33, 19, 3)) kernel(f=f_arr, stag=stag_arr) reference_kernel(f=f_arr, stag=stag_ref) np.testing.assert_almost_equal(stag_arr, stag_ref)
def test_conv(input_channels, output_channels): src_arr = np.random.rand(21, 31, input_channels) dst_arr = np.zeros([21, 31, output_channels]) stencil_arr = np.ones([3, 3, input_channels, output_channels]) / 9 dst, src = ps.fields( f'dst({output_channels}), src({input_channels}): [2d]') stencil = ps.fields(f'stencil({input_channels}, {output_channels}): [3,3]') stencil.field_type = ps.field.FieldType.CUSTOM assignments = channel_convolution(src, stencil, dst) ast = ps.create_kernel(assignments) print(ps.show_code(ast)) kernel = ast.compile() kernel(dst=dst_arr, src=src_arr, stencil=stencil_arr)
def test_conv_advanced(input_channels, output_channels): filter_shape = (5, 4) src_arr = np.random.rand(21, 31, input_channels) dst_arr = np.zeros([21, 31, output_channels]) stencil_arr = np.ones([*filter_shape, input_channels, output_channels ]) / (5 * 4) dst, src = ps.fields( f'dst({output_channels}), src({input_channels}): [2d]') stencil = ps.fields( f'stencil({input_channels}, {output_channels}): [{filter_shape[0]}, {filter_shape[1]}]' ) stencil.field_type = ps.field.FieldType.CUSTOM assignments = channel_convolution(src, stencil, dst) ast = ps.create_kernel(assignments) print(ps.show_code(ast)) kernel = ast.compile() kernel(dst=dst_arr, src=src_arr, stencil=stencil_arr)
def test_jacobi_fixed_field_size(): size = (30, 20) src_field_c = np.random.rand(*size) src_field_py = np.copy(src_field_c) dst_field_c = np.zeros(size) dst_field_py = np.zeros(size) f = Field.create_from_numpy_array("f", src_field_c) d = Field.create_from_numpy_array("d", dst_field_c) jacobi = SympyAssignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) body = Block([jacobi]) loop_node, gl_info = make_loop_over_domain(body) ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): dst_field_py[ x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] + src_field_py[x, y - 1] + src_field_py[x, y + 1]) kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13) code_display = show_code(ast_node) assert 'for' in str(code_display) assert 'for' in code_display._repr_html_()