def create_kernel(assignments, function_name="kernel", type_info=None, split_groups=(), iteration_slice=None, ghost_layers=None): """ Creates an abstract syntax tree for a kernel function, by taking a list of update rules. Loops are created according to the field accesses in the equations. Args: assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`. Defining the update rules of the kernel function_name: name of the generated function - only important if generated code is written out type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to be of type 'double' except symbols which occur on the left hand side of equations where the right hand side is a sympy Boolean which are assumed to be 'bool' . split_groups: Specification on how to split up inner loop into multiple loops. For details see transformation :func:`pystencils.transformation.split_inner_loop` iteration_slice: if not None, iteration is done only over this slice of the field ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers if None, the number of ghost layers is determined automatically and assumed to be equal for a all dimensions :return: :class:`pystencils.ast.KernelFunction` node """ from pystencils.cpu import create_kernel code = create_kernel(assignments, function_name, type_info, split_groups, iteration_slice, ghost_layers) code.body = insert_casts(code.body) code._compile_function = make_python_function code._backend = 'llvm' return code
def test_staggered_iteration_manual(): dim = 2 f_arr = np.arange(5**dim).reshape([5] * dim) s_arr = np.ones([5] * dim + [dim]) * 1234 s_arr_ref = s_arr.copy() f = Field.create_from_numpy_array('f', f_arr) s = Field.create_from_numpy_array('s', s_arr, index_dimensions=1) eqs = [] counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)] for d in range(dim): eq = SympyAssignment( s(d), sum(f[o] for o in offsets_in_plane(d, 0, dim)) - sum(f[o] for o in offsets_in_plane(d, -1, dim))) cond = sp.And(*[conditions[i] for i in range(dim) if d != i]) eqs.append(Conditional(cond, eq)) kernel_ast = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0), (1, 0)]) func = make_python_function(kernel_ast) func(f=f_arr, s=s_arr_ref) inner_loop = [ n for n in kernel_ast.atoms(ast.LoopOverCoordinate) if n.is_innermost_loop ][0] cut_loop(inner_loop, [4]) outer_loop = [ n for n in kernel_ast.atoms(ast.LoopOverCoordinate) if n.is_outermost_loop ][0] cut_loop(outer_loop, [4]) simplify_conditionals(kernel_ast.body, loop_counter_simplification=True) cleanup_blocks(kernel_ast.body) move_constants_before_loop(kernel_ast.body) cleanup_blocks(kernel_ast.body) assert not kernel_ast.atoms( Conditional), "Loop cutting optimization did not work" func_optimized = make_python_function(kernel_ast) func_optimized(f=f_arr, s=s_arr) np.testing.assert_almost_equal(s_arr_ref, s_arr)
def test_staggered_iteration(): dim = 2 f_arr = np.arange(5**dim).reshape([5] * dim).astype(np.float64) s_arr = np.ones([5] * dim + [dim]) * 1234 s_arr_ref = s_arr.copy() fields_fixed = (Field.create_from_numpy_array('f', f_arr), Field.create_from_numpy_array('s', s_arr, index_dimensions=1)) fields_var = (Field.create_generic('f', 2), Field.create_generic('s', 2, index_dimensions=1)) for f, s in [fields_var, fields_fixed]: # --- Manual eqs = [] counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)] for d in range(dim): eq = SympyAssignment( s(d), sum(f[o] for o in offsets_in_plane(d, 0, dim)) - sum(f[o] for o in offsets_in_plane(d, -1, dim))) cond = sp.And(*[conditions[i] for i in range(dim) if d != i]) eqs.append(Conditional(cond, eq)) func = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0), (1, 0)]).compile() # --- Built-in optimized expressions = [] for d in range(dim): expressions.append( sum(f[o] for o in offsets_in_plane(d, 0, dim)) - sum(f[o] for o in offsets_in_plane(d, -1, dim))) func_optimized = create_staggered_kernel(s, expressions).compile() assert not func_optimized.ast.atoms( Conditional), "Loop cutting optimization did not work" func(f=f_arr, s=s_arr_ref) func_optimized(f=f_arr, s=s_arr) np.testing.assert_almost_equal(s_arr_ref, s_arr)
def test_2d_5pt(): size = [30, 50, 3] kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c") with open(kernel_file_path) as kernel_file: reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path) reference = analysis(reference_kernel) arr = np.zeros(size) a = Field.create_from_numpy_array('a', arr, index_dimensions=1) b = Field.create_from_numpy_array('b', arr, index_dimensions=1) s = sp.Symbol("s") rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0] update_rule = Assignment(b[0, 0], s * rhs) ast = create_kernel([update_rule]) k = PyStencilsKerncraftKernel(ast) result = analysis(k) for e1, e2 in zip(reference.results['cycles'], result.results['cycles']): assert e1 == e2
def test_compilation(): machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml") machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path) kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c") with open(kernel_file_path) as kernel_file: reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path) reference_kernel.as_code('likwid') size = [30, 50, 3] arr = np.zeros(size) a = Field.create_from_numpy_array('a', arr, index_dimensions=1) b = Field.create_from_numpy_array('b', arr, index_dimensions=1) s = sp.Symbol("s") rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0] update_rule = Assignment(b[0, 0], s * rhs) ast = create_kernel([update_rule]) mine = generate_benchmark(ast, likwid=False) print(mine)
def test_3d_7pt_iaca(): # Make sure you use the intel compiler size = [20, 200, 200] kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c") machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml") machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path) with open(kernel_file_path) as kernel_file: reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path) reference_kernel.set_constant('M', size[0]) reference_kernel.set_constant('N', size[1]) assert size[1] == size[2] analysis(reference_kernel, model='ecm') arr = np.zeros(size) a = Field.create_from_numpy_array('a', arr, index_dimensions=0) b = Field.create_from_numpy_array('b', arr, index_dimensions=0) s = sp.Symbol("s") rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1] update_rule = Assignment(b[0, 0, 0], s * rhs) ast = create_kernel([update_rule]) k = PyStencilsKerncraftKernel(ast, machine) analysis(k, model='ecm') assert reference_kernel._flops == k._flops
def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu', gpu_exclusive_conditions=False, **kwargs): """Kernel that updates a staggered field. .. image:: /img/staggered_grid.svg Args: staggered_field: field where the first index coordinate defines the location of the staggered value can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location a vector is stored, expressions parameter has to be a sequence of sequences then where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell boundary and ``f[0,0](1)`` the southern cell boundary etc. expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary should be updated. subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions target: 'cpu' or 'gpu' gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed Returns: AST, see `create_kernel` """ assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs assert staggered_field.index_dimensions in ( 1, 2), 'Staggered field must have one or two index dimensions' dim = staggered_field.spatial_dimensions counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] conditions = [ counters[i] < staggered_field.shape[i] - 1 for i in range(dim) ] assert len(expressions) == dim if staggered_field.index_dimensions == 2: assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \ "If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \ "same length." final_assignments = [] last_conditional = None def add(condition, dimensions, as_else_block=False): nonlocal last_conditional if staggered_field.index_dimensions == 1: assignments = [ Assignment(staggered_field(d), expressions[d]) for d in dimensions ] a_coll = AssignmentCollection(assignments, list(subexpressions)) a_coll = a_coll.new_filtered( [staggered_field(d) for d in dimensions]) elif staggered_field.index_dimensions == 2: assert staggered_field.has_fixed_index_shape assignments = [ Assignment(staggered_field(d, i), expr) for d in dimensions for i, expr in enumerate(expressions[d]) ] a_coll = AssignmentCollection(assignments, list(subexpressions)) a_coll = a_coll.new_filtered([ staggered_field(d, i) for i in range(staggered_field.index_shape[1]) for d in dimensions ]) sp_assignments = [ SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments ] if as_else_block and last_conditional: new_cond = Conditional(condition, Block(sp_assignments)) last_conditional.false_block = Block([new_cond]) last_conditional = new_cond else: last_conditional = Conditional(condition, Block(sp_assignments)) final_assignments.append(last_conditional) if target == 'cpu' or not gpu_exclusive_conditions: for d in range(dim): cond = sp.And(*[conditions[i] for i in range(dim) if d != i]) add(cond, [d]) elif target == 'gpu': full_conditions = [ sp.And(*[conditions[i] for i in range(dim) if d != i]) for d in range(dim) ] for include in itertools.product(*[[1, 0]] * dim): case_conditions = sp.And(*[ c if value else sp.Not(c) for c, value in zip(full_conditions, include) ]) dimensions_to_include = [i for i in range(dim) if include[i]] if dimensions_to_include: add(case_conditions, dimensions_to_include, True) ghost_layers = [(1, 0)] * dim blocking = kwargs.get('cpu_blocking', None) if blocking: del kwargs['cpu_blocking'] cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None) if cpu_vectorize_info: del kwargs['cpu_vectorize_info'] openmp = kwargs.get('cpu_openmp', None) if openmp: del kwargs['cpu_openmp'] ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs) if target == 'cpu': remove_conditionals_in_staggered_kernel(ast) move_constants_before_loop(ast) omp_collapse = None if blocking: omp_collapse = loop_blocking(ast, blocking) if openmp: from pystencils.cpu import add_openmp add_openmp(ast, num_threads=openmp, collapse=omp_collapse, assume_single_outer_loop=False) if cpu_vectorize_info is True: vectorize(ast) elif isinstance(cpu_vectorize_info, dict): vectorize(ast, **cpu_vectorize_info) return ast
def create_kernel(assignments, target='cpu', data_type="double", iteration_slice=None, ghost_layers=None, skip_independence_check=False, cpu_openmp=False, cpu_vectorize_info=None, cpu_blocking=None, gpu_indexing='block', gpu_indexing_params=MappingProxyType({})): """ Creates abstract syntax tree (AST) of kernel, using a list of update equations. Args: assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection` target: 'cpu', 'llvm' or 'gpu' data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name to type iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \ part of the field is iterated over ghost_layers: if left to default, the number of necessary ghost layers is determined automatically a single integer specifies the ghost layer count at all borders, can also be a sequence of pairs ``[(x_lower_gl, x_upper_gl), .... ]`` skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for periodicity kernel, that access the field outside the iteration bounds. Use with care! cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal' for documentation of these parameters see vectorize function. Example: '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}' cpu_blocking: a tuple of block sizes or None if no blocking should be applied gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing` gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class) e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }' Returns: abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or can be compiled with through its 'compile()' member Example: >>> import pystencils as ps >>> import numpy as np >>> s, d = ps.fields('s, d: [2D]') >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0]) >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True) >>> kernel = ast.compile() >>> d_arr = np.zeros([5, 5]) >>> kernel(d=d_arr, s=np.ones([5, 5])) >>> d_arr array([[0., 0., 0., 0., 0.], [0., 4., 4., 4., 0.], [0., 4., 4., 4., 0.], [0., 4., 4., 4., 0.], [0., 0., 0., 0., 0.]]) """ # ---- Normalizing parameters split_groups = () if isinstance(assignments, AssignmentCollection): if 'split_groups' in assignments.simplification_hints: split_groups = assignments.simplification_hints['split_groups'] assignments = assignments.all_assignments if isinstance(assignments, Assignment): assignments = [assignments] # ---- Creating ast if target == 'cpu': from pystencils.cpu import create_kernel from pystencils.cpu import add_openmp ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, iteration_slice=iteration_slice, ghost_layers=ghost_layers, skip_independence_check=skip_independence_check) omp_collapse = None if cpu_blocking: omp_collapse = loop_blocking(ast, cpu_blocking) if cpu_openmp: add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse) if cpu_vectorize_info: if cpu_vectorize_info is True: vectorize(ast) elif isinstance(cpu_vectorize_info, dict): vectorize(ast, **cpu_vectorize_info) else: raise ValueError("Invalid value for cpu_vectorize_info") return ast elif target == 'llvm': from pystencils.llvm import create_kernel ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, iteration_slice=iteration_slice, ghost_layers=ghost_layers) return ast elif target == 'gpu': from pystencils.gpucuda import create_cuda_kernel ast = create_cuda_kernel( assignments, type_info=data_type, indexing_creator=indexing_creator_from_params( gpu_indexing, gpu_indexing_params), iteration_slice=iteration_slice, ghost_layers=ghost_layers, skip_independence_check=skip_independence_check) return ast else: raise ValueError( "Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target, ))
def create_staggered_kernel(assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs): """Kernel that updates a staggered field. .. image:: /img/staggered_grid.svg For a staggered field, the first index coordinate defines the location of the staggered value. Further index coordinates can be used to store vectors/tensors at each point. Args: assignments: a sequence of assignments or an AssignmentCollection. Assignments to staggered field are processed specially, while subexpressions and assignments to regular fields are passed through to `create_kernel`. Multiple different staggered fields can be used, but they all need to use the same stencil (i.e. the same number of staggered points) and shape. target: 'CPU' or 'GPU' gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then handled in an else branch. kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed Returns: AST, see `create_kernel` """ if 'ghost_layers' in kwargs: assert kwargs['ghost_layers'] is None del kwargs['ghost_layers'] if 'iteration_slice' in kwargs: assert kwargs['iteration_slice'] is None del kwargs['iteration_slice'] if 'omp_single_loop' in kwargs: assert kwargs['omp_single_loop'] is False del kwargs['omp_single_loop'] if isinstance(assignments, AssignmentCollection): subexpressions = assignments.subexpressions + [ a for a in assignments.main_assignments if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access or not FieldType.is_staggered(a.lhs.field) ] assignments = [ a for a in assignments.main_assignments if hasattr(a, 'lhs') and type(a.lhs) is Field.Access and FieldType.is_staggered(a.lhs.field) ] else: subexpressions = [ a for a in assignments if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access or not FieldType.is_staggered(a.lhs.field) ] assignments = [ a for a in assignments if hasattr(a, 'lhs') and type(a.lhs) is Field.Access and FieldType.is_staggered(a.lhs.field) ] if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1: raise ValueError( "All assignments need to be made to staggered fields with the same stencil" ) if len(set([a.lhs.field.shape for a in assignments])) != 1: raise ValueError( "All assignments need to be made to staggered fields with the same shape" ) staggered_field = assignments[0].lhs.field stencil = staggered_field.staggered_stencil dim = staggered_field.spatial_dimensions shape = staggered_field.shape counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] final_assignments = [] # find out whether any of the ghost layers is not needed common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) for direction in stencil: exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) for elementary_direction in direction: exclusions.remove(inverse_direction_string(elementary_direction)) common_exclusions.intersection_update(exclusions) ghost_layers = [[0, 0] for d in range(dim)] for direction in common_exclusions: direction = direction_string_to_offset(direction) for d, s in enumerate(direction): if s == 1: ghost_layers[d][1] = 1 elif s == -1: ghost_layers[d][0] = 1 def condition(direction): """exclude those staggered points that correspond to fluxes between ghost cells""" exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) for elementary_direction in direction: exclusions.remove(inverse_direction_string(elementary_direction)) conditions = [] for e in exclusions: if e in common_exclusions: continue offset = direction_string_to_offset(e) for i, o in enumerate(offset): if o == 1: conditions.append(counters[i] < shape[i] - 1) elif o == -1: conditions.append(counters[i] > 0) return sp.And(*conditions) if gpu_exclusive_conditions: outer_assignment = None conditions = {direction: condition(direction) for direction in stencil} for num_conditions in range(len(stencil)): for combination in itertools.combinations(conditions.values(), num_conditions): for assignment in assignments: direction = stencil[assignment.lhs.index[0]] if conditions[direction] in combination: assignment = SympyAssignment(assignment.lhs, assignment.rhs) outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment) inner_assignment = [] for assignment in assignments: inner_assignment.append( SympyAssignment(assignment.lhs, assignment.rhs)) last_conditional = Conditional( sp.And(*[condition(d) for d in stencil]), Block(inner_assignment), outer_assignment) final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \ [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \ [last_conditional] if target == Target.CPU: from pystencils.cpu import create_kernel as create_kernel_cpu ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs) else: ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs) return ast for assignment in assignments: direction = stencil[assignment.lhs.index[0]] sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \ [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \ [SympyAssignment(assignment.lhs, assignment.rhs)] last_conditional = Conditional(condition(direction), Block(sp_assignments)) final_assignments.append(last_conditional) remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers]) prepend_optimizations = [ lambda ast: remove_conditionals_in_staggered_kernel( ast, remove_start_conditional), move_constants_before_loop ] if 'cpu_prepend_optimizations' in kwargs: prepend_optimizations += kwargs['cpu_prepend_optimizations'] del kwargs['cpu_prepend_optimizations'] ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, omp_single_loop=False, cpu_prepend_optimizations=prepend_optimizations, **kwargs) return ast
def create_domain_kernel(assignments: List[Assignment], *, config: CreateKernelConfig): """ Creates abstract syntax tree (AST) of kernel, using a list of update equations. Args: assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection` config: CreateKernelConfig which includes the needed configuration Returns: abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or can be compiled with through its 'compile()' member Example: >>> import pystencils as ps >>> import numpy as np >>> s, d = ps.fields('s, d: [2D]') >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0]) >>> kernel_config = ps.CreateKernelConfig(cpu_openmp=True) >>> kernel_ast = ps.kernelcreation.create_domain_kernel([assignment], config=kernel_config) >>> kernel = kernel_ast.compile() >>> d_arr = np.zeros([5, 5]) >>> kernel(d=d_arr, s=np.ones([5, 5])) >>> d_arr array([[0., 0., 0., 0., 0.], [0., 4., 4., 4., 0.], [0., 4., 4., 4., 0.], [0., 4., 4., 4., 0.], [0., 0., 0., 0., 0.]]) """ # --- applying first default simplifications try: if config.default_assignment_simplifications and isinstance( assignments, AssignmentCollection): simplification = create_simplification_strategy() assignments = simplification(assignments) except Exception as e: warnings.warn( f"It was not possible to apply the default pystencils optimisations to the " f"AssignmentCollection due to the following problem :{e}") # ---- Normalizing parameters split_groups = () if isinstance(assignments, AssignmentCollection): if 'split_groups' in assignments.simplification_hints: split_groups = assignments.simplification_hints['split_groups'] assignments = assignments.all_assignments try: if config.default_assignment_simplifications: assignments = apply_sympy_optimisations(assignments) except Exception as e: warnings.warn( f"It was not possible to apply the default SymPy optimisations to the " f"Assignments due to the following problem :{e}") # ---- Creating ast ast = None if config.target == Target.CPU: if config.backend == Backend.C: from pystencils.cpu import add_openmp, create_kernel ast = create_kernel( assignments, function_name=config.function_name, type_info=config.data_type, split_groups=split_groups, iteration_slice=config.iteration_slice, ghost_layers=config.ghost_layers, skip_independence_check=config.skip_independence_check) for optimization in config.cpu_prepend_optimizations: optimization(ast) omp_collapse = None if config.cpu_blocking: omp_collapse = loop_blocking(ast, config.cpu_blocking) if config.cpu_openmp: add_openmp(ast, num_threads=config.cpu_openmp, collapse=omp_collapse, assume_single_outer_loop=config.omp_single_loop) if config.cpu_vectorize_info: if config.cpu_vectorize_info is True: vectorize(ast) elif isinstance(config.cpu_vectorize_info, dict): vectorize(ast, **config.cpu_vectorize_info) if config.cpu_openmp and config.cpu_blocking and 'nontemporal' in config.cpu_vectorize_info and \ config.cpu_vectorize_info['nontemporal'] and 'cachelineZero' in ast.instruction_set: # This condition is stricter than it needs to be: if blocks along the fastest axis start on a # cache line boundary, it's okay. But we cannot determine that here. # We don't need to disallow OpenMP collapsing because it is never applied to the inner loop. raise ValueError( "Blocking cannot be combined with cacheline-zeroing" ) else: raise ValueError("Invalid value for cpu_vectorize_info") elif config.target == Target.GPU: if config.backend == Backend.CUDA: from pystencils.gpucuda import create_cuda_kernel ast = create_cuda_kernel( assignments, function_name=config.function_name, type_info=config.data_type, indexing_creator=indexing_creator_from_params( config.gpu_indexing, config.gpu_indexing_params), iteration_slice=config.iteration_slice, ghost_layers=config.ghost_layers, skip_independence_check=config.skip_independence_check) if not ast: raise NotImplementedError( f'{config.target} together with {config.backend} is not supported by `create_domain_kernel`' ) if config.use_auto_for_assignments: for a in ast.atoms(SympyAssignment): a.use_auto = True return ast
def compile_macroscopic_values_getter(lb_method, output_quantities, pdf_arr=None, ghost_layers=1, iteration_slice=None, field_layout='numpy', target=Target.CPU, streaming_pattern='pull', previous_timestep=Timestep.BOTH): """ Create kernel to compute macroscopic value(s) from a pdf field (e.g. density or velocity) Args: lb_method: instance of :class:`lbmpy.methods.AbstractLbMethod` output_quantities: sequence of quantities to compute e.g. ['density', 'velocity'] pdf_arr: optional numpy array for pdf field - used to get optimal loop structure for kernel ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers that should be excluded from the iteration. If None, the number of ghost layers is determined automatically and assumed to be equal for all dimensions. iteration_slice: if not None, iteration is done only over this slice of the field field_layout: layout for output field, also used for pdf field if pdf_arr is not given target: `Target.CPU` or `Target.GPU` previous_step_accessor: The accessor used by the streaming pattern of the previous timestep Returns: a function to compute macroscopic values: - pdf_array - keyword arguments from name of conserved quantity (as in output_quantities) to numpy field """ if not (isinstance(output_quantities, list) or isinstance(output_quantities, tuple)): output_quantities = [output_quantities] cqc = lb_method.conserved_quantity_computation unknown_quantities = [ oq for oq in output_quantities if oq not in cqc.conserved_quantities ] if unknown_quantities: raise ValueError( "No such conserved quantity: %s, conserved quantities are %s" % (str(unknown_quantities), str(cqc.conserved_quantities.keys()))) if pdf_arr is None: pdf_field = Field.create_generic('pdfs', lb_method.dim, index_dimensions=1, layout=field_layout) else: pdf_field = Field.create_from_numpy_array('pdfs', pdf_arr, index_dimensions=1) output_mapping = {} for output_quantity in output_quantities: number_of_elements = cqc.conserved_quantities[output_quantity] assert number_of_elements >= 1 ind_dims = 0 if number_of_elements <= 1 else 1 if pdf_arr is None: output_field = Field.create_generic(output_quantity, lb_method.dim, layout=field_layout, index_dimensions=ind_dims) else: output_field_shape = pdf_arr.shape[:-1] if ind_dims > 0: output_field_shape += (number_of_elements, ) field_layout = get_layout_of_array(pdf_arr) else: field_layout = get_layout_of_array( pdf_arr, index_dimension_ids=[len(pdf_field.shape) - 1]) output_field = Field.create_fixed_size(output_quantity, output_field_shape, ind_dims, pdf_arr.dtype, field_layout) output_mapping[output_quantity] = [ output_field(i) for i in range(number_of_elements) ] if len(output_mapping[output_quantity]) == 1: output_mapping[output_quantity] = output_mapping[output_quantity][ 0] stencil = lb_method.stencil previous_step_accessor = get_accessor(streaming_pattern, previous_timestep) pdf_symbols = previous_step_accessor.write(pdf_field, stencil) eqs = cqc.output_equations_from_pdfs(pdf_symbols, output_mapping).all_assignments if target == Target.CPU: import pystencils.cpu as cpu kernel = cpu.make_python_function( cpu.create_kernel(eqs, ghost_layers=ghost_layers, iteration_slice=iteration_slice)) elif target == Target.GPU: import pystencils.gpucuda as gpu kernel = gpu.make_python_function( gpu.create_cuda_kernel(eqs, ghost_layers=ghost_layers, iteration_slice=iteration_slice)) else: raise ValueError( "Unknown target '%s'. Possible targets are `Target.CPU` and `Target.GPU`" % (target, )) def getter(pdfs, **kwargs): if pdf_arr is not None: assert pdfs.shape == pdf_arr.shape and pdfs.strides == pdf_arr.strides, \ "Pdf array not matching blueprint which was used to compile" + str(pdfs.shape) + str(pdf_arr.shape) if not set(output_quantities).issubset(kwargs.keys()): raise ValueError( "You have to specify the output field for each of the following quantities: %s" % (str(output_quantities), )) kernel(pdfs=pdfs, **kwargs) return getter
def compile_macroscopic_values_setter(lb_method, quantities_to_set, pdf_arr=None, ghost_layers=1, iteration_slice=None, field_layout='numpy', target=Target.CPU, streaming_pattern='pull', previous_timestep=Timestep.BOTH): """ Creates a function that sets a pdf field to specified macroscopic quantities The returned function can be called with the pdf field to set as single argument Args: lb_method: instance of :class:`lbmpy.methods.AbstractLbMethod` quantities_to_set: map from conserved quantity name to fixed value or numpy array pdf_arr: optional numpy array for pdf field - used to get optimal loop structure for kernel ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers that should be excluded from the iteration. If None, the number of ghost layers is determined automatically and assumed to be equal for all dimensions. iteration_slice: if not None, iteration is done only over this slice of the field field_layout: layout of the pdf field if pdf_arr was not given target: `Target.CPU` or `Target.GPU` previous_step_accessor: The accessor used by the streaming pattern of the previous timestep Returns: function taking pdf array as single argument and which sets the field to the given values """ if pdf_arr is not None: pdf_field = Field.create_from_numpy_array('pdfs', pdf_arr, index_dimensions=1) else: pdf_field = Field.create_generic('pdfs', lb_method.dim, index_dimensions=1, layout=field_layout) fixed_kernel_parameters = {} cqc = lb_method.conserved_quantity_computation value_map = {} at_least_one_field_input = False for quantity_name, value in quantities_to_set.items(): if hasattr(value, 'shape'): fixed_kernel_parameters[quantity_name] = value at_least_one_field_input = True num_components = cqc.conserved_quantities[quantity_name] field = Field.create_from_numpy_array( quantity_name, value, index_dimensions=0 if num_components <= 1 else 1) if num_components == 1: value = field(0) else: value = [field(i) for i in range(num_components)] value_map[quantity_name] = value cq_equations = cqc.equilibrium_input_equations_from_init_values( **value_map, force_substitution=False) eq = lb_method.get_equilibrium(conserved_quantity_equations=cq_equations) if at_least_one_field_input: simplification = create_simplification_strategy(lb_method) eq = simplification(eq) else: eq = eq.new_without_subexpressions() previous_step_accessor = get_accessor(streaming_pattern, previous_timestep) write_accesses = previous_step_accessor.write(pdf_field, lb_method.stencil) substitutions = { sym: write_accesses[i] for i, sym in enumerate(lb_method.post_collision_pdf_symbols) } eq = eq.new_with_substitutions(substitutions).all_assignments if target == Target.CPU: import pystencils.cpu as cpu kernel = cpu.make_python_function(cpu.create_kernel(eq)) kernel = functools.partial(kernel, **fixed_kernel_parameters) elif target == Target.GPU: import pystencils.gpucuda as gpu kernel = gpu.make_python_function(gpu.create_cuda_kernel(eq)) kernel = functools.partial(kernel, **fixed_kernel_parameters) else: raise ValueError( "Unknown target '%s'. Possible targets are `Target.CPU` and `Target.GPU`" % (target, )) def setter(pdfs, **kwargs): if pdf_arr is not None: assert pdfs.shape == pdf_arr.shape and pdfs.strides == pdf_arr.strides, \ "Pdf array not matching blueprint which was used to compile" + str(pdfs.shape) + str(pdf_arr.shape) kernel(pdfs=pdfs, **kwargs) return setter