def test_loop_over_coordinate(): assignments = [Assignment(dst[0, 0](0), s[0]), Assignment(x, dst[0, 0](2))] body = Block(assignments) loop = LoopOverCoordinate(body, coordinate_to_loop_over=0, start=0, stop=10, step=1) assert loop.body == body new_body = Block([assignments[0]]) loop = loop.new_loop_with_different_body(new_body) assert loop.body == new_body assert loop.start == 0 assert loop.stop == 10 assert loop.step == 1 loop.replace(loop.start, 2) loop.replace(loop.stop, 20) loop.replace(loop.step, 2) assert loop.start == 2 assert loop.stop == 20 assert loop.step == 2
def add(condition, dimensions, as_else_block=False): nonlocal last_conditional if staggered_field.index_dimensions == 1: assignments = [ Assignment(staggered_field(d), expressions[d]) for d in dimensions ] a_coll = AssignmentCollection(assignments, list(subexpressions)) a_coll = a_coll.new_filtered( [staggered_field(d) for d in dimensions]) elif staggered_field.index_dimensions == 2: assert staggered_field.has_fixed_index_shape assignments = [ Assignment(staggered_field(d, i), expr) for d in dimensions for i, expr in enumerate(expressions[d]) ] a_coll = AssignmentCollection(assignments, list(subexpressions)) a_coll = a_coll.new_filtered([ staggered_field(d, i) for i in range(staggered_field.index_shape[1]) for d in dimensions ]) sp_assignments = [ SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments ] if as_else_block and last_conditional: new_cond = Conditional(condition, Block(sp_assignments)) last_conditional.false_block = Block([new_cond]) last_conditional = new_cond else: last_conditional = Conditional(condition, Block(sp_assignments)) final_assignments.append(last_conditional)
def test_symbol_renaming(): """When two loops have assignments to the same symbol with different rhs and both are pulled before the loops, one of them has to be renamed """ f, g = ps.fields("f, g : double[2D]") a, b, c = [TypedSymbol(n, np.float64) for n in ('a', 'b', 'c')] loop1 = LoopOverCoordinate( Block( [SympyAssignment(c, a + b), SympyAssignment(g[0, 0], f[0, 0] + c)]), 0, 0, 10) loop2 = LoopOverCoordinate( Block([ SympyAssignment(c, a**2 + b**2), SympyAssignment(g[0, 0], f[0, 0] + c) ]), 0, 0, 10) block = Block([loop1, loop2]) move_constants_before_loop(block) loops = block.atoms(LoopOverCoordinate) assert len(loops) == 2 for loop in loops: assert len(loop.body.args) == 1 assert len(loop.parent.args) == 4 # 2 loops + 2 subexpressions assert loop.parent.args[0].lhs.name != loop.parent.args[1].lhs.name
def test_vec_any(instruction_set, dtype): if instruction_set in ['sve', 'rvv']: width = 4 # we don't know the actual value else: width = get_vector_instruction_set(dtype, instruction_set)['width'] data_arr = np.zeros((4 * width, 4 * width), dtype=np.float64 if dtype == 'double' else np.float32) data_arr[3:9, 1:3 * width - 1] = 1.0 data = ps.fields(f"data: {dtype}[2D]", data=data_arr) c = [ ps.Assignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)), Conditional(vec_any(data.center() > 0.0), Block([ps.Assignment(data.center(), 2.0)])) ] ast = ps.create_kernel( c, target=ps.Target.CPU, cpu_vectorize_info={'instruction_set': instruction_set}) kernel = ast.compile() kernel(data=data_arr) if instruction_set in ['sve', 'rvv']: # we only know that the first value has changed np.testing.assert_equal(data_arr[3:9, :3 * width - 1], 2.0) else: np.testing.assert_equal(data_arr[3:9, :3 * width], 2.0)
def test_jacobi_variable_field_size(): size = (3, 3, 3) f = Field.create_generic("f", 3) d = Field.create_generic("d", 3) jacobi = SympyAssignment( d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4) body = Block([jacobi]) loop_node, gl_info = make_loop_over_domain(body) ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) src_field_c = np.random.rand(*size) src_field_py = np.copy(src_field_c) dst_field_c = np.zeros(size) dst_field_py = np.zeros(size) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): for z in range(1, size[2] - 1): dst_field_py[x, y, z] = 0.25 * ( src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] + src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z]) kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13)
def test_headers_have_quotes_or_brackets(): class ErrorNode1(CustomCodeNode): def __init__(self): super().__init__("", [], []) self.headers = ["iostream"] class ErrorNode2(CustomCodeNode): headers = ["<iostream>", "foo"] def __init__(self): super().__init__("", [], []) self.headers = ["<iostream>", "foo"] class OkNode3(CustomCodeNode): def __init__(self): super().__init__("", [], []) self.headers = ["<iostream>", '"foo"'] with pytest.raises(AssertionError, match='.* does not follow the pattern .*'): get_headers(Block([ErrorNode1()])) with pytest.raises(AssertionError, match='.* does not follow the pattern .*'): get_headers(ErrorNode2()) get_headers(OkNode3())
def test_vec_all(instruction_set, dtype): if instruction_set in ['sve', 'rvv']: width = 1000 # we don't know the actual value, need something guaranteed larger than vector else: width = get_vector_instruction_set(dtype, instruction_set)['width'] data_arr = np.zeros((4 * width, 4 * width), dtype=np.float64 if dtype == 'double' else np.float32) data_arr[3:9, 1:3 * width - 1] = 1.0 data = ps.fields(f"data: {dtype}[2D]", data=data_arr) c = [ Conditional(vec_all(data.center() > 0.0), Block([ps.Assignment(data.center(), 2.0)])) ] ast = ps.create_kernel( c, target=Target.CPU, cpu_vectorize_info={'instruction_set': instruction_set}) kernel = ast.compile() kernel(data=data_arr) if instruction_set in ['sve', 'rvv']: # we only know that some values in the middle have been replaced assert np.all(data_arr[3:9, :2] <= 1.0) assert np.any(data_arr[3:9, 2:] == 2.0) else: np.testing.assert_equal(data_arr[3:9, :1], 0.0) np.testing.assert_equal(data_arr[3:9, 1:width], 1.0) np.testing.assert_equal(data_arr[3:9, width:2 * width], 2.0) np.testing.assert_equal(data_arr[3:9, 2 * width:3 * width - 1], 1.0) np.testing.assert_equal(data_arr[3:9, 3 * width - 1:], 0.0)
def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel", type_info=None, coordinate_names=('x', 'y', 'z'), indexing_creator=BlockIndexing): fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) for index_field in index_fields: index_field.field_type = FieldType.INDEXED assert FieldType.is_indexed(index_field) assert index_field.spatial_dimensions == 1, "Index fields have to be 1D" non_index_fields = [f for f in all_fields if f not in index_fields] spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} assert len(spatial_coordinates) == 1, "Non-index fields do not have the same number of spatial coordinates" spatial_coordinates = list(spatial_coordinates)[0] def get_coordinate_symbol_assignment(name): for ind_f in index_fields: assert isinstance(ind_f.dtype, StructType), "Index fields have to have a struct data type" data_type = ind_f.dtype if data_type.has_element(name): rhs = ind_f[0](name) lhs = TypedSymbol(name, np.int64) return SympyAssignment(lhs, rhs) raise ValueError(f"Index {name} not found in any of the passed index fields") coordinate_symbol_assignments = [get_coordinate_symbol_assignment(n) for n in coordinate_names[:spatial_coordinates]] coordinate_typed_symbols = [eq.lhs for eq in coordinate_symbol_assignments] idx_field = list(index_fields)[0] indexing = indexing_creator(field=idx_field, iteration_slice=[slice(None, None, None)] * len(idx_field.spatial_shape)) function_body = Block(coordinate_symbol_assignments + assignments) function_body = indexing.guard(function_body, get_common_shape(index_fields)) ast = KernelFunction(function_body, Target.GPU, Backend.CUDA, make_python_function, None, function_name, assignments=assignments) ast.global_variables.update(indexing.index_variables) coord_mapping = indexing.coordinates base_pointer_spec = [['spatialInner0']] base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0], f.spatial_dimensions, f.index_dimensions) for f in all_fields} coord_mapping = {f.name: coord_mapping for f in index_fields} coord_mapping.update({f.name: coordinate_typed_symbols for f in non_index_fields}) resolve_field_accesses(ast, read_only_fields, field_to_fixed_coordinates=coord_mapping, field_to_base_pointer_info=base_pointer_info) # add the function which determines #blocks and #threads as additional member to KernelFunction node # this is used by the jit ast.indexing = indexing return ast
def guard(self, kernel_content, arr_shape): arr_shape = arr_shape[:self._dim] conditions = [ c < end for c, end in zip( self.coordinates, _get_end_from_slice(self._iterationSlice, arr_shape)) ] condition = conditions[0] for c in conditions[1:]: condition = sp.And(condition, c) return Block([Conditional(condition, kernel_content)])
def test_vec_any(): data_arr = np.zeros((15, 15)) data_arr[3:9, 2:7] = 1.0 data = ps.fields("data: double[2D]", data=data_arr) c = [ ps.Assignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)), Conditional(vec_any(data.center() > 0.0), Block([ps.Assignment(data.center(), 2.0)])) ] ast = ps.create_kernel(c, target='cpu', cpu_vectorize_info={'instruction_set': 'avx'}) kernel = ast.compile() kernel(data=data_arr) np.testing.assert_equal(data_arr[3:9, 0:8], 2.0)
def test_block(): assignments = [Assignment(dst[0, 0](0), s[0]), Assignment(x, dst[0, 0](2))] bl = Block(assignments) assert bl.symbols_defined == {dst[0, 0](0), dst[0, 0](2), s[0], x} bl.append([Assignment(y, 10)]) assert bl.symbols_defined == {dst[0, 0](0), dst[0, 0](2), s[0], x, y} assert len(bl.args) == 3 list_iterator = iter([Assignment(s[1], 11)]) bl.insert_front(list_iterator) assert bl.args[0] == Assignment(s[1], 11)
def boundary_conditional(boundary, direction, streaming_pattern, prev_timestep, lb_method, output_field, cse=False): stencil = lb_method.stencil dir_indices = direction_indices_in_direction(direction, stencil) indexing = BetweenTimestepsIndexing(output_field, lb_method.stencil, prev_timestep, streaming_pattern) f_out, f_in = indexing.proxy_fields inv_dir = indexing.inverse_dir_symbol assignments = [] for direction_idx in dir_indices: rule = boundary(f_out, f_in, direction_idx, inv_dir, lb_method, index_field=None) # rhs: replace f_out by post collision symbols. rhs_substitutions = { f_out(i): sym for i, sym in enumerate(lb_method.post_collision_pdf_symbols) } rule = AssignmentCollection( [rule]).new_with_substitutions(rhs_substitutions) rule = indexing.substitute_proxies(rule) ac = rule.new_without_subexpressions() assignments += ac.main_assignments border_cond = border_conditions(direction, output_field, ghost_layers=1) if cse: assignments = sympy_cse_on_assignment_list(assignments) assignments = [SympyAssignment(a.lhs, a.rhs) for a in assignments] return Conditional(border_cond, Block(assignments))
def test_vec_maskstore(instruction_set, dtype): data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32) data_arr[3:-3, 3:-3] = 1.0 data = ps.fields(f"data: {dtype}[2D]", data=data_arr) c = [ Conditional(data.center() < 1.0, Block([ps.Assignment(data.center(), 2.0)])) ] ast = ps.create_kernel( c, target=Target.CPU, cpu_vectorize_info={'instruction_set': instruction_set}) kernel = ast.compile() kernel(data=data_arr) np.testing.assert_equal(data_arr[:3, :], 2.0) np.testing.assert_equal(data_arr[-3:, :], 2.0) np.testing.assert_equal(data_arr[:, :3], 2.0) np.testing.assert_equal(data_arr[:, -3:], 2.0) np.testing.assert_equal(data_arr[3:-3, 3:-3], 1.0)
def test_wrapper_function(): z, y, x = pystencils.fields("z, y, x: [100, 80]") forward_assignments = pystencils.AssignmentCollection( [pystencils.Assignment(z[0, 0], x[0, 0] * sp.log(x[0, 0] * y[0, 0]))], []) for target in ('cpu', 'gpu'): ast = pystencils.create_kernel(forward_assignments, target=target) kernel_call_ast = FunctionCall(ast) wrapper = WrapperFunction( DestructuringBindingsForFieldClass(kernel_call_ast)) code = FrameworkIntegrationPrinter()(wrapper) print(code) for target in ('cpu', 'gpu'): ast = pystencils.create_kernel(forward_assignments, target=target) kernel_call_ast = FunctionCall(ast) wrapper = WrapperFunction(Block([kernel_call_ast])) code = FrameworkIntegrationPrinter()(wrapper) print(code)
def test_jacobi_fixed_field_size(): size = (30, 20) src_field_c = np.random.rand(*size) src_field_py = np.copy(src_field_c) dst_field_c = np.zeros(size) dst_field_py = np.zeros(size) f = Field.create_from_numpy_array("f", src_field_c) d = Field.create_from_numpy_array("d", dst_field_c) jacobi = SympyAssignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) body = Block([jacobi]) loop_node, gl_info = make_loop_over_domain(body) ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): dst_field_py[ x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] + src_field_py[x, y - 1] + src_field_py[x, y + 1]) kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13) code_display = show_code(ast_node) assert 'for' in str(code_display) assert 'for' in code_display._repr_html_()
def create_cuda_kernel(assignments, function_name="kernel", type_info=None, indexing_creator=BlockIndexing, iteration_slice=None, ghost_layers=None, skip_independence_check=False): assert assignments, "Assignments must not be empty!" fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) buffers = set([f for f in all_fields if FieldType.is_buffer(f) or FieldType.is_custom(f)]) fields_without_buffers = all_fields - buffers field_accesses = set() num_buffer_accesses = 0 for eq in assignments: field_accesses.update(eq.atoms(Field.Access)) field_accesses = {e for e in field_accesses if not e.is_absolute_access} num_buffer_accesses += sum(1 for access in eq.atoms(Field.Access) if FieldType.is_buffer(access.field)) common_shape = get_common_shape(fields_without_buffers) if iteration_slice is None: # determine iteration slice from ghost layers if ghost_layers is None: # determine required number of ghost layers from field access required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses]) ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(common_shape) iteration_slice = [] if isinstance(ghost_layers, int): for i in range(len(common_shape)): iteration_slice.append(slice(ghost_layers, -ghost_layers if ghost_layers > 0 else None)) ghost_layers = [(ghost_layers, ghost_layers)] * len(common_shape) else: for i in range(len(common_shape)): iteration_slice.append(slice(ghost_layers[i][0], -ghost_layers[i][1] if ghost_layers[i][1] > 0 else None)) indexing = indexing_creator(field=list(fields_without_buffers)[0], iteration_slice=iteration_slice) coord_mapping = indexing.coordinates cell_idx_assignments = [SympyAssignment(LoopOverCoordinate.get_loop_counter_symbol(i), value) for i, value in enumerate(coord_mapping)] cell_idx_symbols = [LoopOverCoordinate.get_loop_counter_symbol(i) for i, _ in enumerate(coord_mapping)] assignments = cell_idx_assignments + assignments block = Block(assignments) block = indexing.guard(block, common_shape) unify_shape_symbols(block, common_shape=common_shape, fields=fields_without_buffers) ast = KernelFunction(block, Target.GPU, Backend.CUDA, make_python_function, ghost_layers, function_name, assignments=assignments) ast.global_variables.update(indexing.index_variables) base_pointer_spec = [['spatialInner0']] base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0], f.spatial_dimensions, f.index_dimensions) for f in all_fields} coord_mapping = {f.name: cell_idx_symbols for f in all_fields} loop_strides = list(fields_without_buffers)[0].shape if any(FieldType.is_buffer(f) for f in all_fields): resolve_buffer_accesses(ast, get_base_buffer_index(ast, indexing.coordinates, loop_strides), read_only_fields) resolve_field_accesses(ast, read_only_fields, field_to_base_pointer_info=base_pointer_info, field_to_fixed_coordinates=coord_mapping) # add the function which determines #blocks and #threads as additional member to KernelFunction node # this is used by the jit # If loop counter symbols have been explicitly used in the update equations (e.g. for built in periodicity), # they are defined here undefined_loop_counters = {LoopOverCoordinate.is_loop_counter_symbol(s): s for s in ast.body.undefined_symbols if LoopOverCoordinate.is_loop_counter_symbol(s) is not None} for i, loop_counter in undefined_loop_counters.items(): ast.body.insert_front(SympyAssignment(loop_counter, indexing.coordinates[i])) ast.indexing = indexing return ast
def create_indexed_kernel( assignments: AssignmentOrAstNodeList, index_fields, function_name="kernel", type_info=None, coordinate_names=('x', 'y', 'z')) -> KernelFunction: """ Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. The coordinates are stored in a separate index_field, which is a one dimensional array with struct data type. This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the 'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for example boundary parameters. Args: assignments: list of assignments index_fields: list of index fields, i.e. 1D fields with struct data type type_info: see documentation of :func:`create_kernel` function_name: see documentation of :func:`create_kernel` coordinate_names: name of the coordinate fields in the struct data type """ fields_read, fields_written, assignments = add_types( assignments, type_info, check_independence_condition=False) all_fields = fields_read.union(fields_written) for index_field in index_fields: index_field.field_type = FieldType.INDEXED assert FieldType.is_indexed(index_field) assert index_field.spatial_dimensions == 1, "Index fields have to be 1D" non_index_fields = [f for f in all_fields if f not in index_fields] spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} assert len( spatial_coordinates ) == 1, "Non-index fields do not have the same number of spatial coordinates" spatial_coordinates = list(spatial_coordinates)[0] def get_coordinate_symbol_assignment(name): for idx_field in index_fields: assert isinstance( idx_field.dtype, StructType), "Index fields have to have a struct data type" data_type = idx_field.dtype if data_type.has_element(name): rhs = idx_field[0](name) lhs = TypedSymbol(name, BasicType(data_type.get_element_type(name))) return SympyAssignment(lhs, rhs) raise ValueError( "Index %s not found in any of the passed index fields" % (name, )) coordinate_symbol_assignments = [ get_coordinate_symbol_assignment(n) for n in coordinate_names[:spatial_coordinates] ] coordinate_typed_symbols = [eq.lhs for eq in coordinate_symbol_assignments] assignments = coordinate_symbol_assignments + assignments # make 1D loop over index fields loop_body = Block([]) loop_node = LoopOverCoordinate(loop_body, coordinate_to_loop_over=0, start=0, stop=index_fields[0].shape[0]) for assignment in assignments: loop_body.append(assignment) function_body = Block([loop_node]) ast_node = KernelFunction(function_body, "cpu", "c", make_python_function, ghost_layers=None, function_name=function_name) fixed_coordinate_mapping = { f.name: coordinate_typed_symbols for f in non_index_fields } read_only_fields = set([f.name for f in fields_read - fields_written]) resolve_field_accesses(ast_node, read_only_fields, field_to_fixed_coordinates=fixed_coordinate_mapping) move_constants_before_loop(ast_node) return ast_node
def create_staggered_kernel(assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs): """Kernel that updates a staggered field. .. image:: /img/staggered_grid.svg For a staggered field, the first index coordinate defines the location of the staggered value. Further index coordinates can be used to store vectors/tensors at each point. Args: assignments: a sequence of assignments or an AssignmentCollection. Assignments to staggered field are processed specially, while subexpressions and assignments to regular fields are passed through to `create_kernel`. Multiple different staggered fields can be used, but they all need to use the same stencil (i.e. the same number of staggered points) and shape. target: 'CPU' or 'GPU' gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then handled in an else branch. kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed Returns: AST, see `create_kernel` """ if 'ghost_layers' in kwargs: assert kwargs['ghost_layers'] is None del kwargs['ghost_layers'] if 'iteration_slice' in kwargs: assert kwargs['iteration_slice'] is None del kwargs['iteration_slice'] if 'omp_single_loop' in kwargs: assert kwargs['omp_single_loop'] is False del kwargs['omp_single_loop'] if isinstance(assignments, AssignmentCollection): subexpressions = assignments.subexpressions + [ a for a in assignments.main_assignments if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access or not FieldType.is_staggered(a.lhs.field) ] assignments = [ a for a in assignments.main_assignments if hasattr(a, 'lhs') and type(a.lhs) is Field.Access and FieldType.is_staggered(a.lhs.field) ] else: subexpressions = [ a for a in assignments if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access or not FieldType.is_staggered(a.lhs.field) ] assignments = [ a for a in assignments if hasattr(a, 'lhs') and type(a.lhs) is Field.Access and FieldType.is_staggered(a.lhs.field) ] if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1: raise ValueError( "All assignments need to be made to staggered fields with the same stencil" ) if len(set([a.lhs.field.shape for a in assignments])) != 1: raise ValueError( "All assignments need to be made to staggered fields with the same shape" ) staggered_field = assignments[0].lhs.field stencil = staggered_field.staggered_stencil dim = staggered_field.spatial_dimensions shape = staggered_field.shape counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] final_assignments = [] # find out whether any of the ghost layers is not needed common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) for direction in stencil: exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) for elementary_direction in direction: exclusions.remove(inverse_direction_string(elementary_direction)) common_exclusions.intersection_update(exclusions) ghost_layers = [[0, 0] for d in range(dim)] for direction in common_exclusions: direction = direction_string_to_offset(direction) for d, s in enumerate(direction): if s == 1: ghost_layers[d][1] = 1 elif s == -1: ghost_layers[d][0] = 1 def condition(direction): """exclude those staggered points that correspond to fluxes between ghost cells""" exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) for elementary_direction in direction: exclusions.remove(inverse_direction_string(elementary_direction)) conditions = [] for e in exclusions: if e in common_exclusions: continue offset = direction_string_to_offset(e) for i, o in enumerate(offset): if o == 1: conditions.append(counters[i] < shape[i] - 1) elif o == -1: conditions.append(counters[i] > 0) return sp.And(*conditions) if gpu_exclusive_conditions: outer_assignment = None conditions = {direction: condition(direction) for direction in stencil} for num_conditions in range(len(stencil)): for combination in itertools.combinations(conditions.values(), num_conditions): for assignment in assignments: direction = stencil[assignment.lhs.index[0]] if conditions[direction] in combination: assignment = SympyAssignment(assignment.lhs, assignment.rhs) outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment) inner_assignment = [] for assignment in assignments: inner_assignment.append( SympyAssignment(assignment.lhs, assignment.rhs)) last_conditional = Conditional( sp.And(*[condition(d) for d in stencil]), Block(inner_assignment), outer_assignment) final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \ [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \ [last_conditional] if target == Target.CPU: from pystencils.cpu import create_kernel as create_kernel_cpu ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs) else: ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs) return ast for assignment in assignments: direction = stencil[assignment.lhs.index[0]] sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \ [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \ [SympyAssignment(assignment.lhs, assignment.rhs)] last_conditional = Conditional(condition(direction), Block(sp_assignments)) final_assignments.append(last_conditional) remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers]) prepend_optimizations = [ lambda ast: remove_conditionals_in_staggered_kernel( ast, remove_start_conditional), move_constants_before_loop ] if 'cpu_prepend_optimizations' in kwargs: prepend_optimizations += kwargs['cpu_prepend_optimizations'] del kwargs['cpu_prepend_optimizations'] ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, omp_single_loop=False, cpu_prepend_optimizations=prepend_optimizations, **kwargs) return ast