def test_symbol_renaming(): """When two loops have assignments to the same symbol with different rhs and both are pulled before the loops, one of them has to be renamed """ f, g = ps.fields("f, g : double[2D]") a, b, c = [TypedSymbol(n, np.float64) for n in ('a', 'b', 'c')] loop1 = LoopOverCoordinate( Block( [SympyAssignment(c, a + b), SympyAssignment(g[0, 0], f[0, 0] + c)]), 0, 0, 10) loop2 = LoopOverCoordinate( Block([ SympyAssignment(c, a**2 + b**2), SympyAssignment(g[0, 0], f[0, 0] + c) ]), 0, 0, 10) block = Block([loop1, loop2]) move_constants_before_loop(block) loops = block.atoms(LoopOverCoordinate) assert len(loops) == 2 for loop in loops: assert len(loop.body.args) == 1 assert len(loop.parent.args) == 4 # 2 loops + 2 subexpressions assert loop.parent.args[0].lhs.name != loop.parent.args[1].lhs.name
def add(condition, dimensions, as_else_block=False): nonlocal last_conditional if staggered_field.index_dimensions == 1: assignments = [ Assignment(staggered_field(d), expressions[d]) for d in dimensions ] a_coll = AssignmentCollection(assignments, list(subexpressions)) a_coll = a_coll.new_filtered( [staggered_field(d) for d in dimensions]) elif staggered_field.index_dimensions == 2: assert staggered_field.has_fixed_index_shape assignments = [ Assignment(staggered_field(d, i), expr) for d in dimensions for i, expr in enumerate(expressions[d]) ] a_coll = AssignmentCollection(assignments, list(subexpressions)) a_coll = a_coll.new_filtered([ staggered_field(d, i) for i in range(staggered_field.index_shape[1]) for d in dimensions ]) sp_assignments = [ SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments ] if as_else_block and last_conditional: new_cond = Conditional(condition, Block(sp_assignments)) last_conditional.false_block = Block([new_cond]) last_conditional = new_cond else: last_conditional = Conditional(condition, Block(sp_assignments)) final_assignments.append(last_conditional)
def test_jacobi_variable_field_size(): size = (3, 3, 3) f = Field.create_generic("f", 3) d = Field.create_generic("d", 3) jacobi = SympyAssignment( d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4) body = Block([jacobi]) loop_node, gl_info = make_loop_over_domain(body) ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) src_field_c = np.random.rand(*size) src_field_py = np.copy(src_field_c) dst_field_c = np.zeros(size) dst_field_py = np.zeros(size) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): for z in range(1, size[2] - 1): dst_field_py[x, y, z] = 0.25 * ( src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] + src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z]) kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13)
def get_coordinate_symbol_assignment(name): for ind_f in index_fields: assert isinstance(ind_f.dtype, StructType), "Index fields have to have a struct data type" data_type = ind_f.dtype if data_type.has_element(name): rhs = ind_f[0](name) lhs = TypedSymbol(name, np.int64) return SympyAssignment(lhs, rhs) raise ValueError(f"Index {name} not found in any of the passed index fields")
def get_coordinate_symbol_assignment(name): for idx_field in index_fields: assert isinstance( idx_field.dtype, StructType), "Index fields have to have a struct data type" data_type = idx_field.dtype if data_type.has_element(name): rhs = idx_field[0](name) lhs = TypedSymbol(name, BasicType(data_type.get_element_type(name))) return SympyAssignment(lhs, rhs) raise ValueError( "Index %s not found in any of the passed index fields" % (name, ))
def test_staggered_iteration_manual(): dim = 2 f_arr = np.arange(5**dim).reshape([5] * dim) s_arr = np.ones([5] * dim + [dim]) * 1234 s_arr_ref = s_arr.copy() f = Field.create_from_numpy_array('f', f_arr) s = Field.create_from_numpy_array('s', s_arr, index_dimensions=1) eqs = [] counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)] for d in range(dim): eq = SympyAssignment( s(d), sum(f[o] for o in offsets_in_plane(d, 0, dim)) - sum(f[o] for o in offsets_in_plane(d, -1, dim))) cond = sp.And(*[conditions[i] for i in range(dim) if d != i]) eqs.append(Conditional(cond, eq)) kernel_ast = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0), (1, 0)]) func = make_python_function(kernel_ast) func(f=f_arr, s=s_arr_ref) inner_loop = [ n for n in kernel_ast.atoms(ast.LoopOverCoordinate) if n.is_innermost_loop ][0] cut_loop(inner_loop, [4]) outer_loop = [ n for n in kernel_ast.atoms(ast.LoopOverCoordinate) if n.is_outermost_loop ][0] cut_loop(outer_loop, [4]) simplify_conditionals(kernel_ast.body, loop_counter_simplification=True) cleanup_blocks(kernel_ast.body) move_constants_before_loop(kernel_ast.body) cleanup_blocks(kernel_ast.body) assert not kernel_ast.atoms( Conditional), "Loop cutting optimization did not work" func_optimized = make_python_function(kernel_ast) func_optimized(f=f_arr, s=s_arr) np.testing.assert_almost_equal(s_arr_ref, s_arr)
def test_staggered_iteration(): dim = 2 f_arr = np.arange(5**dim).reshape([5] * dim).astype(np.float64) s_arr = np.ones([5] * dim + [dim]) * 1234 s_arr_ref = s_arr.copy() fields_fixed = (Field.create_from_numpy_array('f', f_arr), Field.create_from_numpy_array('s', s_arr, index_dimensions=1)) fields_var = (Field.create_generic('f', 2), Field.create_generic('s', 2, index_dimensions=1)) for f, s in [fields_var, fields_fixed]: # --- Manual eqs = [] counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)] for d in range(dim): eq = SympyAssignment( s(d), sum(f[o] for o in offsets_in_plane(d, 0, dim)) - sum(f[o] for o in offsets_in_plane(d, -1, dim))) cond = sp.And(*[conditions[i] for i in range(dim) if d != i]) eqs.append(Conditional(cond, eq)) func = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0), (1, 0)]).compile() # --- Built-in optimized expressions = [] for d in range(dim): expressions.append( sum(f[o] for o in offsets_in_plane(d, 0, dim)) - sum(f[o] for o in offsets_in_plane(d, -1, dim))) func_optimized = create_staggered_kernel(s, expressions).compile() assert not func_optimized.ast.atoms( Conditional), "Loop cutting optimization did not work" func(f=f_arr, s=s_arr_ref) func_optimized(f=f_arr, s=s_arr) np.testing.assert_almost_equal(s_arr_ref, s_arr)
def boundary_conditional(boundary, direction, streaming_pattern, prev_timestep, lb_method, output_field, cse=False): stencil = lb_method.stencil dir_indices = direction_indices_in_direction(direction, stencil) indexing = BetweenTimestepsIndexing(output_field, lb_method.stencil, prev_timestep, streaming_pattern) f_out, f_in = indexing.proxy_fields inv_dir = indexing.inverse_dir_symbol assignments = [] for direction_idx in dir_indices: rule = boundary(f_out, f_in, direction_idx, inv_dir, lb_method, index_field=None) # rhs: replace f_out by post collision symbols. rhs_substitutions = { f_out(i): sym for i, sym in enumerate(lb_method.post_collision_pdf_symbols) } rule = AssignmentCollection( [rule]).new_with_substitutions(rhs_substitutions) rule = indexing.substitute_proxies(rule) ac = rule.new_without_subexpressions() assignments += ac.main_assignments border_cond = border_conditions(direction, output_field, ghost_layers=1) if cse: assignments = sympy_cse_on_assignment_list(assignments) assignments = [SympyAssignment(a.lhs, a.rhs) for a in assignments] return Conditional(border_cond, Block(assignments))
def equations_to_code(equations, variable_prefix="lm.", variables_without_prefix=[], dtype="double"): if isinstance(equations, AssignmentCollection): equations = equations.all_assignments variables_without_prefix = list(variables_without_prefix) c_backend = CBackend() result = [] left_hand_side_names = [e.lhs.name for e in equations] for eq in equations: assignment = SympyAssignment( type_expr(eq.lhs, dtype=dtype), type_expr(field_and_symbol_substitute( eq.rhs, variable_prefix, variables_without_prefix + left_hand_side_names), dtype=dtype)) result.append(c_backend(assignment)) return "\n".join(result)
def test_jacobi_fixed_field_size(): size = (30, 20) src_field_c = np.random.rand(*size) src_field_py = np.copy(src_field_c) dst_field_c = np.zeros(size) dst_field_py = np.zeros(size) f = Field.create_from_numpy_array("f", src_field_c) d = Field.create_from_numpy_array("d", dst_field_c) jacobi = SympyAssignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) body = Block([jacobi]) loop_node, gl_info = make_loop_over_domain(body) ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): dst_field_py[ x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] + src_field_py[x, y - 1] + src_field_py[x, y + 1]) kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13) code_display = show_code(ast_node) assert 'for' in str(code_display) assert 'for' in code_display._repr_html_()
def test_sympy_assignment(default_assignment_simplifications): assignment = SympyAssignment(dst[0, 0](0), sp.log(x + 3) / sp.log(2) + sp.log(x**2 + 1)) config = ps.CreateKernelConfig( default_assignment_simplifications=default_assignment_simplifications) ast = ps.create_kernel([assignment], config=config) code = ps.get_code_str(ast) if default_assignment_simplifications: assert 'log1p' in code # constant term is directly evaluated assert 'log2' not in code else: # no optimisations will be applied so the optimised version of log will not be in the code assert 'log1p' not in code assert 'log2' not in code assignment.replace(assignment.lhs, dst[0, 0](1)) assignment.replace(assignment.rhs, sp.log(2)) assert assignment.lhs == dst[0, 0](1) assert assignment.rhs == sp.log(2)
def create_staggered_kernel(assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs): """Kernel that updates a staggered field. .. image:: /img/staggered_grid.svg For a staggered field, the first index coordinate defines the location of the staggered value. Further index coordinates can be used to store vectors/tensors at each point. Args: assignments: a sequence of assignments or an AssignmentCollection. Assignments to staggered field are processed specially, while subexpressions and assignments to regular fields are passed through to `create_kernel`. Multiple different staggered fields can be used, but they all need to use the same stencil (i.e. the same number of staggered points) and shape. target: 'CPU' or 'GPU' gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then handled in an else branch. kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed Returns: AST, see `create_kernel` """ if 'ghost_layers' in kwargs: assert kwargs['ghost_layers'] is None del kwargs['ghost_layers'] if 'iteration_slice' in kwargs: assert kwargs['iteration_slice'] is None del kwargs['iteration_slice'] if 'omp_single_loop' in kwargs: assert kwargs['omp_single_loop'] is False del kwargs['omp_single_loop'] if isinstance(assignments, AssignmentCollection): subexpressions = assignments.subexpressions + [ a for a in assignments.main_assignments if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access or not FieldType.is_staggered(a.lhs.field) ] assignments = [ a for a in assignments.main_assignments if hasattr(a, 'lhs') and type(a.lhs) is Field.Access and FieldType.is_staggered(a.lhs.field) ] else: subexpressions = [ a for a in assignments if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access or not FieldType.is_staggered(a.lhs.field) ] assignments = [ a for a in assignments if hasattr(a, 'lhs') and type(a.lhs) is Field.Access and FieldType.is_staggered(a.lhs.field) ] if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1: raise ValueError( "All assignments need to be made to staggered fields with the same stencil" ) if len(set([a.lhs.field.shape for a in assignments])) != 1: raise ValueError( "All assignments need to be made to staggered fields with the same shape" ) staggered_field = assignments[0].lhs.field stencil = staggered_field.staggered_stencil dim = staggered_field.spatial_dimensions shape = staggered_field.shape counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] final_assignments = [] # find out whether any of the ghost layers is not needed common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) for direction in stencil: exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) for elementary_direction in direction: exclusions.remove(inverse_direction_string(elementary_direction)) common_exclusions.intersection_update(exclusions) ghost_layers = [[0, 0] for d in range(dim)] for direction in common_exclusions: direction = direction_string_to_offset(direction) for d, s in enumerate(direction): if s == 1: ghost_layers[d][1] = 1 elif s == -1: ghost_layers[d][0] = 1 def condition(direction): """exclude those staggered points that correspond to fluxes between ghost cells""" exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) for elementary_direction in direction: exclusions.remove(inverse_direction_string(elementary_direction)) conditions = [] for e in exclusions: if e in common_exclusions: continue offset = direction_string_to_offset(e) for i, o in enumerate(offset): if o == 1: conditions.append(counters[i] < shape[i] - 1) elif o == -1: conditions.append(counters[i] > 0) return sp.And(*conditions) if gpu_exclusive_conditions: outer_assignment = None conditions = {direction: condition(direction) for direction in stencil} for num_conditions in range(len(stencil)): for combination in itertools.combinations(conditions.values(), num_conditions): for assignment in assignments: direction = stencil[assignment.lhs.index[0]] if conditions[direction] in combination: assignment = SympyAssignment(assignment.lhs, assignment.rhs) outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment) inner_assignment = [] for assignment in assignments: inner_assignment.append( SympyAssignment(assignment.lhs, assignment.rhs)) last_conditional = Conditional( sp.And(*[condition(d) for d in stencil]), Block(inner_assignment), outer_assignment) final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \ [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \ [last_conditional] if target == Target.CPU: from pystencils.cpu import create_kernel as create_kernel_cpu ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs) else: ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs) return ast for assignment in assignments: direction = stencil[assignment.lhs.index[0]] sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \ [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \ [SympyAssignment(assignment.lhs, assignment.rhs)] last_conditional = Conditional(condition(direction), Block(sp_assignments)) final_assignments.append(last_conditional) remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers]) prepend_optimizations = [ lambda ast: remove_conditionals_in_staggered_kernel( ast, remove_start_conditional), move_constants_before_loop ] if 'cpu_prepend_optimizations' in kwargs: prepend_optimizations += kwargs['cpu_prepend_optimizations'] del kwargs['cpu_prepend_optimizations'] ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, omp_single_loop=False, cpu_prepend_optimizations=prepend_optimizations, **kwargs) return ast
def create_cuda_kernel(assignments, function_name="kernel", type_info=None, indexing_creator=BlockIndexing, iteration_slice=None, ghost_layers=None, skip_independence_check=False): assert assignments, "Assignments must not be empty!" fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) buffers = set([f for f in all_fields if FieldType.is_buffer(f) or FieldType.is_custom(f)]) fields_without_buffers = all_fields - buffers field_accesses = set() num_buffer_accesses = 0 for eq in assignments: field_accesses.update(eq.atoms(Field.Access)) field_accesses = {e for e in field_accesses if not e.is_absolute_access} num_buffer_accesses += sum(1 for access in eq.atoms(Field.Access) if FieldType.is_buffer(access.field)) common_shape = get_common_shape(fields_without_buffers) if iteration_slice is None: # determine iteration slice from ghost layers if ghost_layers is None: # determine required number of ghost layers from field access required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses]) ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(common_shape) iteration_slice = [] if isinstance(ghost_layers, int): for i in range(len(common_shape)): iteration_slice.append(slice(ghost_layers, -ghost_layers if ghost_layers > 0 else None)) ghost_layers = [(ghost_layers, ghost_layers)] * len(common_shape) else: for i in range(len(common_shape)): iteration_slice.append(slice(ghost_layers[i][0], -ghost_layers[i][1] if ghost_layers[i][1] > 0 else None)) indexing = indexing_creator(field=list(fields_without_buffers)[0], iteration_slice=iteration_slice) coord_mapping = indexing.coordinates cell_idx_assignments = [SympyAssignment(LoopOverCoordinate.get_loop_counter_symbol(i), value) for i, value in enumerate(coord_mapping)] cell_idx_symbols = [LoopOverCoordinate.get_loop_counter_symbol(i) for i, _ in enumerate(coord_mapping)] assignments = cell_idx_assignments + assignments block = Block(assignments) block = indexing.guard(block, common_shape) unify_shape_symbols(block, common_shape=common_shape, fields=fields_without_buffers) ast = KernelFunction(block, Target.GPU, Backend.CUDA, make_python_function, ghost_layers, function_name, assignments=assignments) ast.global_variables.update(indexing.index_variables) base_pointer_spec = [['spatialInner0']] base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0], f.spatial_dimensions, f.index_dimensions) for f in all_fields} coord_mapping = {f.name: cell_idx_symbols for f in all_fields} loop_strides = list(fields_without_buffers)[0].shape if any(FieldType.is_buffer(f) for f in all_fields): resolve_buffer_accesses(ast, get_base_buffer_index(ast, indexing.coordinates, loop_strides), read_only_fields) resolve_field_accesses(ast, read_only_fields, field_to_base_pointer_info=base_pointer_info, field_to_fixed_coordinates=coord_mapping) # add the function which determines #blocks and #threads as additional member to KernelFunction node # this is used by the jit # If loop counter symbols have been explicitly used in the update equations (e.g. for built in periodicity), # they are defined here undefined_loop_counters = {LoopOverCoordinate.is_loop_counter_symbol(s): s for s in ast.body.undefined_symbols if LoopOverCoordinate.is_loop_counter_symbol(s) is not None} for i, loop_counter in undefined_loop_counters.items(): ast.body.insert_front(SympyAssignment(loop_counter, indexing.coordinates[i])) ast.indexing = indexing return ast