def test_jacobi_variable_field_size(): size = (3, 3, 3) f = Field.create_generic("f", 3) d = Field.create_generic("d", 3) jacobi = SympyAssignment( d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4) body = Block([jacobi]) loop_node, gl_info = make_loop_over_domain(body) ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) src_field_c = np.random.rand(*size) src_field_py = np.copy(src_field_c) dst_field_c = np.zeros(size) dst_field_py = np.zeros(size) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): for z in range(1, size[2] - 1): dst_field_py[x, y, z] = 0.25 * ( src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] + src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z]) kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13)
def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel", type_info=None, coordinate_names=('x', 'y', 'z'), indexing_creator=BlockIndexing): fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) for index_field in index_fields: index_field.field_type = FieldType.INDEXED assert FieldType.is_indexed(index_field) assert index_field.spatial_dimensions == 1, "Index fields have to be 1D" non_index_fields = [f for f in all_fields if f not in index_fields] spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} assert len(spatial_coordinates) == 1, "Non-index fields do not have the same number of spatial coordinates" spatial_coordinates = list(spatial_coordinates)[0] def get_coordinate_symbol_assignment(name): for ind_f in index_fields: assert isinstance(ind_f.dtype, StructType), "Index fields have to have a struct data type" data_type = ind_f.dtype if data_type.has_element(name): rhs = ind_f[0](name) lhs = TypedSymbol(name, np.int64) return SympyAssignment(lhs, rhs) raise ValueError(f"Index {name} not found in any of the passed index fields") coordinate_symbol_assignments = [get_coordinate_symbol_assignment(n) for n in coordinate_names[:spatial_coordinates]] coordinate_typed_symbols = [eq.lhs for eq in coordinate_symbol_assignments] idx_field = list(index_fields)[0] indexing = indexing_creator(field=idx_field, iteration_slice=[slice(None, None, None)] * len(idx_field.spatial_shape)) function_body = Block(coordinate_symbol_assignments + assignments) function_body = indexing.guard(function_body, get_common_shape(index_fields)) ast = KernelFunction(function_body, Target.GPU, Backend.CUDA, make_python_function, None, function_name, assignments=assignments) ast.global_variables.update(indexing.index_variables) coord_mapping = indexing.coordinates base_pointer_spec = [['spatialInner0']] base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0], f.spatial_dimensions, f.index_dimensions) for f in all_fields} coord_mapping = {f.name: coord_mapping for f in index_fields} coord_mapping.update({f.name: coordinate_typed_symbols for f in non_index_fields}) resolve_field_accesses(ast, read_only_fields, field_to_fixed_coordinates=coord_mapping, field_to_base_pointer_info=base_pointer_info) # add the function which determines #blocks and #threads as additional member to KernelFunction node # this is used by the jit ast.indexing = indexing return ast
def test_jacobi_fixed_field_size(): size = (30, 20) src_field_c = np.random.rand(*size) src_field_py = np.copy(src_field_c) dst_field_c = np.zeros(size) dst_field_py = np.zeros(size) f = Field.create_from_numpy_array("f", src_field_c) d = Field.create_from_numpy_array("d", dst_field_c) jacobi = SympyAssignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) body = Block([jacobi]) loop_node, gl_info = make_loop_over_domain(body) ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): dst_field_py[ x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] + src_field_py[x, y - 1] + src_field_py[x, y + 1]) kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13) code_display = show_code(ast_node) assert 'for' in str(code_display) assert 'for' in code_display._repr_html_()
def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "kernel", type_info='double', split_groups=(), iteration_slice=None, ghost_layers=None, skip_independence_check=False) -> KernelFunction: """Creates an abstract syntax tree for a kernel function, by taking a list of update rules. Loops are created according to the field accesses in the equations. Args: assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`. Defining the update rules of the kernel function_name: name of the generated function - only important if generated code is written out type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to be of type 'double' except symbols which occur on the left hand side of equations where the right hand side is a sympy Boolean which are assumed to be 'bool' . split_groups: Specification on how to split up inner loop into multiple loops. For details see transformation :func:`pystencils.transformation.split_inner_loop` iteration_slice: if not None, iteration is done only over this slice of the field ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers if None, the number of ghost layers is determined automatically and assumed to be equal for a all dimensions skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for periodicity kernel, that access the field outside the iteration bounds. Use with care! Returns: AST node representing a function, that can be printed as C or CUDA code """ def type_symbol(term): if isinstance(term, Field.Access) or isinstance(term, TypedSymbol): return term elif isinstance(term, sp.Symbol): if not hasattr(type_info, '__getitem__'): return TypedSymbol(term.name, create_type(type_info)) else: return TypedSymbol(term.name, type_info[term.name]) else: raise ValueError("Term has to be field access or symbol") fields_read, fields_written, assignments = add_types( assignments, type_info, not skip_independence_check) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) buffers = set([f for f in all_fields if FieldType.is_buffer(f)]) fields_without_buffers = all_fields - buffers body = ast.Block(assignments) loop_order = get_optimal_loop_ordering(fields_without_buffers) loop_node, ghost_layer_info = make_loop_over_domain( body, iteration_slice=iteration_slice, ghost_layers=ghost_layers, loop_order=loop_order) ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function, ghost_layers=ghost_layer_info, function_name=function_name) if split_groups: typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups] split_inner_loop(ast_node, typed_split_groups) base_pointer_spec = [['spatialInner0'], ['spatialInner1'] ] if len(loop_order) >= 2 else [['spatialInner0']] base_pointer_info = { field.name: parse_base_pointer_info(base_pointer_spec, loop_order, field.spatial_dimensions, field.index_dimensions) for field in fields_without_buffers } buffer_base_pointer_info = { field.name: parse_base_pointer_info([['spatialInner0']], [0], field.spatial_dimensions, field.index_dimensions) for field in buffers } base_pointer_info.update(buffer_base_pointer_info) if any(FieldType.is_buffer(f) for f in all_fields): resolve_buffer_accesses(ast_node, get_base_buffer_index(ast_node), read_only_fields) resolve_field_accesses(ast_node, read_only_fields, field_to_base_pointer_info=base_pointer_info) move_constants_before_loop(ast_node) return ast_node
def create_indexed_kernel( assignments: AssignmentOrAstNodeList, index_fields, function_name="kernel", type_info=None, coordinate_names=('x', 'y', 'z')) -> KernelFunction: """ Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. The coordinates are stored in a separate index_field, which is a one dimensional array with struct data type. This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the 'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for example boundary parameters. Args: assignments: list of assignments index_fields: list of index fields, i.e. 1D fields with struct data type type_info: see documentation of :func:`create_kernel` function_name: see documentation of :func:`create_kernel` coordinate_names: name of the coordinate fields in the struct data type """ fields_read, fields_written, assignments = add_types( assignments, type_info, check_independence_condition=False) all_fields = fields_read.union(fields_written) for index_field in index_fields: index_field.field_type = FieldType.INDEXED assert FieldType.is_indexed(index_field) assert index_field.spatial_dimensions == 1, "Index fields have to be 1D" non_index_fields = [f for f in all_fields if f not in index_fields] spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} assert len( spatial_coordinates ) == 1, "Non-index fields do not have the same number of spatial coordinates" spatial_coordinates = list(spatial_coordinates)[0] def get_coordinate_symbol_assignment(name): for idx_field in index_fields: assert isinstance( idx_field.dtype, StructType), "Index fields have to have a struct data type" data_type = idx_field.dtype if data_type.has_element(name): rhs = idx_field[0](name) lhs = TypedSymbol(name, BasicType(data_type.get_element_type(name))) return SympyAssignment(lhs, rhs) raise ValueError( "Index %s not found in any of the passed index fields" % (name, )) coordinate_symbol_assignments = [ get_coordinate_symbol_assignment(n) for n in coordinate_names[:spatial_coordinates] ] coordinate_typed_symbols = [eq.lhs for eq in coordinate_symbol_assignments] assignments = coordinate_symbol_assignments + assignments # make 1D loop over index fields loop_body = Block([]) loop_node = LoopOverCoordinate(loop_body, coordinate_to_loop_over=0, start=0, stop=index_fields[0].shape[0]) for assignment in assignments: loop_body.append(assignment) function_body = Block([loop_node]) ast_node = KernelFunction(function_body, "cpu", "c", make_python_function, ghost_layers=None, function_name=function_name) fixed_coordinate_mapping = { f.name: coordinate_typed_symbols for f in non_index_fields } read_only_fields = set([f.name for f in fields_read - fields_written]) resolve_field_accesses(ast_node, read_only_fields, field_to_fixed_coordinates=fixed_coordinate_mapping) move_constants_before_loop(ast_node) return ast_node
def create_cuda_kernel(assignments, function_name="kernel", type_info=None, indexing_creator=BlockIndexing, iteration_slice=None, ghost_layers=None, skip_independence_check=False): assert assignments, "Assignments must not be empty!" fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) buffers = set([f for f in all_fields if FieldType.is_buffer(f) or FieldType.is_custom(f)]) fields_without_buffers = all_fields - buffers field_accesses = set() num_buffer_accesses = 0 for eq in assignments: field_accesses.update(eq.atoms(Field.Access)) field_accesses = {e for e in field_accesses if not e.is_absolute_access} num_buffer_accesses += sum(1 for access in eq.atoms(Field.Access) if FieldType.is_buffer(access.field)) common_shape = get_common_shape(fields_without_buffers) if iteration_slice is None: # determine iteration slice from ghost layers if ghost_layers is None: # determine required number of ghost layers from field access required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses]) ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(common_shape) iteration_slice = [] if isinstance(ghost_layers, int): for i in range(len(common_shape)): iteration_slice.append(slice(ghost_layers, -ghost_layers if ghost_layers > 0 else None)) ghost_layers = [(ghost_layers, ghost_layers)] * len(common_shape) else: for i in range(len(common_shape)): iteration_slice.append(slice(ghost_layers[i][0], -ghost_layers[i][1] if ghost_layers[i][1] > 0 else None)) indexing = indexing_creator(field=list(fields_without_buffers)[0], iteration_slice=iteration_slice) coord_mapping = indexing.coordinates cell_idx_assignments = [SympyAssignment(LoopOverCoordinate.get_loop_counter_symbol(i), value) for i, value in enumerate(coord_mapping)] cell_idx_symbols = [LoopOverCoordinate.get_loop_counter_symbol(i) for i, _ in enumerate(coord_mapping)] assignments = cell_idx_assignments + assignments block = Block(assignments) block = indexing.guard(block, common_shape) unify_shape_symbols(block, common_shape=common_shape, fields=fields_without_buffers) ast = KernelFunction(block, Target.GPU, Backend.CUDA, make_python_function, ghost_layers, function_name, assignments=assignments) ast.global_variables.update(indexing.index_variables) base_pointer_spec = [['spatialInner0']] base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0], f.spatial_dimensions, f.index_dimensions) for f in all_fields} coord_mapping = {f.name: cell_idx_symbols for f in all_fields} loop_strides = list(fields_without_buffers)[0].shape if any(FieldType.is_buffer(f) for f in all_fields): resolve_buffer_accesses(ast, get_base_buffer_index(ast, indexing.coordinates, loop_strides), read_only_fields) resolve_field_accesses(ast, read_only_fields, field_to_base_pointer_info=base_pointer_info, field_to_fixed_coordinates=coord_mapping) # add the function which determines #blocks and #threads as additional member to KernelFunction node # this is used by the jit # If loop counter symbols have been explicitly used in the update equations (e.g. for built in periodicity), # they are defined here undefined_loop_counters = {LoopOverCoordinate.is_loop_counter_symbol(s): s for s in ast.body.undefined_symbols if LoopOverCoordinate.is_loop_counter_symbol(s) is not None} for i, loop_counter in undefined_loop_counters.items(): ast.body.insert_front(SympyAssignment(loop_counter, indexing.coordinates[i])) ast.indexing = indexing return ast