def random_symbol(assignment_list, dim, seed=TypedSymbol("seed", np.uint32), rng_node=PhiloxTwoDoubles, time_step=TypedSymbol("time_step", np.uint32), offsets=None): """Return a symbol generator for random numbers Args: assignment_list: the subexpressions member of an AssignmentCollection, into which helper variables assignments will be inserted dim: 2 or 3 for two or three spatial dimensions seed: an integer or TypedSymbol(..., np.uint32) to seed the random number generator. If you create multiple symbol generators, please pass them different seeds so you don't get the same stream of random numbers! rng_node: which random number generator to use (PhiloxTwoDoubles, PhiloxFourFloats, AESNITwoDoubles, AESNIFourFloats). time_step: TypedSymbol(..., np.uint32) that indicates the number of the current time step offsets: tuple of offsets (constant integers or TypedSymbol(..., np.uint32)) that give the global coordinates of the local origin """ counter = 0 while True: keys = (counter, seed) + (0, ) * (rng_node._num_keys - 2) node = rng_node(dim, keys=keys, time_step=time_step, offsets=offsets) inserted = False for symbol in node.result_symbols: if not inserted: assignment_list.insert(0, node) inserted = True yield symbol counter += 1
def __call__(self, field, direction_symbol, **kwargs): neighbor = BoundaryOffsetInfo.offset_from_dir(direction_symbol, field.spatial_dimensions) if field.index_dimensions == 0: if math.isclose(90, self._contact_angle, abs_tol=1e-5): return [Assignment(field.center, field[neighbor])] dist = TypedSymbol("h", self._data_type) angle = TypedSymbol("a", self._data_type) tmp = TypedSymbol("tmp", self._data_type) result = [ Assignment(tmp, sum([x * x for x in neighbor])), Assignment(dist, 0.5 * sp.sqrt(tmp)), Assignment(angle, math.cos(math.radians(self._contact_angle))) ] var = -dist * (4.0 / self._interface_width) * angle tmp = 1 + var else_branch = (tmp - sp.sqrt(tmp * tmp - 4 * var * field[neighbor]) ) / var - field[neighbor] update = sp.Piecewise((field[neighbor], dist < 0.001), (else_branch, True)) result.append(Assignment(field.center, update)) return result else: raise NotImplementedError( "Contact angle only implemented for phase-fields which have a single " "value for each cell")
def __init__(self, dim, time_step=TypedSymbol("time_step", np.uint32), offsets=None, keys=None): if keys is None: keys = (0, ) * self._num_keys if offsets is None: offsets = (0, ) * dim if len(keys) != self._num_keys: raise ValueError( f"Provided {len(keys)} keys but need {self._num_keys}") if len(offsets) != dim: raise ValueError(f"Provided {len(offsets)} offsets but need {dim}") coordinates = [ LoopOverCoordinate.get_loop_counter_symbol(i) + offsets[i] for i in range(dim) ] if dim < 3: coordinates.append(0) self._args = sp.sympify([time_step, *coordinates, *keys]) self.result_symbols = tuple( TypedSymbol(f'random_{self.id}_{i}', self._data_type) for i in range(self._num_vars)) symbols_read = set.union(*[s.atoms(sp.Symbol) for s in self.args]) super().__init__("", symbols_read=symbols_read, symbols_defined=self.result_symbols) self.headers = [f'"{self._name.split("_")[0]}_rand.h"'] RNGBase.id += 1
def test_dynamic_matrix_location_dependent(): try: from pystencils.data_types import TypedMatrixSymbol except ImportError: import pytest pytest.skip() x, y = pystencils.fields('x, y: float32[3d]') A = TypedMatrixSymbol('A', 3, 1, create_type('double'), CustomCppType('Vector3<double>')) my_fun_call = DynamicFunction( TypedSymbol('my_fun', 'std::function<Vector3<double>(int, int, int)>'), A.dtype, *pystencils.x_vector(3)) assignments = pystencils.AssignmentCollection({ A: my_fun_call, y.center: A[0] + A[1] + A[2] }) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter()) my_fun_call = DynamicFunction( TypedSymbol('my_fun', TemplateType('Functor_T')), A.dtype, *pystencils.x_vector(3)) assignments = pystencils.AssignmentCollection({ A: my_fun_call, y.center: A[0] + A[1] + A[2] }) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
def type_symbol(term): if isinstance(term, Field.Access) or isinstance(term, TypedSymbol): return term elif isinstance(term, sp.Symbol): if not hasattr(type_info, '__getitem__'): return TypedSymbol(term.name, create_type(type_info)) else: return TypedSymbol(term.name, type_info[term.name]) else: raise ValueError("Term has to be field access or symbol")
def test_assumptions(): x = ps.fields('x: float32[3d]') assert x.shape[0].is_nonnegative assert (2 * x.shape[0]).is_nonnegative assert (2 * x.shape[0]).is_integer assert (TypedSymbol('a', create_type('uint64'))).is_nonnegative assert (TypedSymbol('a', create_type('uint64'))).is_positive is None assert (TypedSymbol('a', create_type('uint64')) + 1).is_positive assert (x.shape[0] + 1).is_real
def test_rng_offsets(kind, vectorized): if vectorized: test = test_rng_vectorized if not instruction_sets: pytest.skip("cannot detect CPU instruction set") else: test = test_rng if kind == 'value': test(instruction_sets[-1] if vectorized else Target.CPU, 'philox', 'float', 'float', t=8, offsets=(6, 7), keys=(5, 309)) elif kind == 'symbol': offsets = (TypedSymbol("x0", np.uint32), TypedSymbol("y0", np.uint32)) test(instruction_sets[-1] if vectorized else Target.GPU, 'philox', 'float', 'float', t=8, offsets=offsets, offset_values=(6, 7), keys=(5, 309))
def visit_node(node, substitution_dict, default_type='double'): substitution_dict = substitution_dict.copy() for arg in node.args: if isinstance(arg, ast.SympyAssignment): assignment = arg subs_expr = fast_subs(assignment.rhs, substitution_dict, skip=lambda e: isinstance(e, ast.ResolvedFieldAccess)) assignment.rhs = visit_expr(subs_expr, default_type) rhs_type = get_type_of_expression(assignment.rhs) if isinstance(assignment.lhs, TypedSymbol): lhs_type = assignment.lhs.dtype if type(rhs_type) is VectorType and type(lhs_type) is not VectorType: new_lhs_type = VectorType(lhs_type, rhs_type.width) new_lhs = TypedSymbol(assignment.lhs.name, new_lhs_type) substitution_dict[assignment.lhs] = new_lhs assignment.lhs = new_lhs elif isinstance(assignment.lhs, vector_memory_access): assignment.lhs = visit_expr(assignment.lhs, default_type) elif isinstance(arg, ast.Conditional): arg.condition_expr = fast_subs(arg.condition_expr, substitution_dict, skip=lambda e: isinstance(e, ast.ResolvedFieldAccess)) arg.condition_expr = visit_expr(arg.condition_expr, default_type) visit_node(arg, substitution_dict, default_type) else: visit_node(arg, substitution_dict, default_type)
def get_coordinate_symbol_assignment(name): for ind_f in index_fields: assert isinstance(ind_f.dtype, StructType), "Index fields have to have a struct data type" data_type = ind_f.dtype if data_type.has_element(name): rhs = ind_f[0](name) lhs = TypedSymbol(name, np.int64) return SympyAssignment(lhs, rhs) raise ValueError(f"Index {name} not found in any of the passed index fields")
def __new__(cls, mesh_name, base_dtype, *args): from pystencils.data_types import TypedMatrixSymbol A = TypedMatrixSymbol('A', 3, 1, base_dtype, 'Vector3<real_t>') obj = DynamicFunction.__new__( cls, TypedSymbol(str(mesh_name), 'std::function<Vector3<real_t>(int, int, int)>'), A.dtype, *args) obj.mesh_name = mesh_name return obj
def test_advanced_streaming_noslip_single_cell(stencil, streaming_pattern, prev_timestep): """ Advanced Streaming NoSlip Test """ stencil = LBStencil(stencil) pdf_field = ps.fields(f'pdfs({stencil.Q}): [{stencil.D}D]') prev_pdf_access = AccessPdfValues(stencil, streaming_pattern, prev_timestep, 'out') next_pdf_access = AccessPdfValues(stencil, streaming_pattern, prev_timestep.next(), 'in') pdfs = np.zeros((3, ) * stencil.D + (stencil.Q, )) pos = (1, ) * stencil.D for d in range(stencil.Q): prev_pdf_access.write_pdf(pdfs, pos, d, d) lbm_config = LBMConfig(stencil=stencil, method=Method.SRT) lb_method = create_lb_method(lbm_config=lbm_config) noslip = NoSlip() index_struct_dtype = numpy_data_type_for_boundary_object(noslip, stencil.D) index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0], shape=(TypedSymbol("indexVectorSize", create_type(np.int64)), 1), strides=(1, 1)) index_vector = np.array([pos + (d, ) for d in range(stencil.Q)], dtype=index_struct_dtype) ast = create_lattice_boltzmann_boundary_kernel( pdf_field, index_field, lb_method, noslip, prev_timestep=prev_timestep, streaming_pattern=streaming_pattern) flex_kernel = ast.compile() flex_kernel(pdfs=pdfs, indexVector=index_vector, indexVectorSize=len(index_vector)) reflected_pdfs = [ next_pdf_access.read_pdf(pdfs, pos, d) for d in range(stencil.Q) ] inverse_pdfs = [inverse_dir_index(stencil, d) for d in range(stencil.Q)] assert reflected_pdfs == inverse_pdfs
def test_Basic_data_type(): assert typed_symbols(("s", "f"), np.uint) == typed_symbols("s, f", np.uint) t_symbols = typed_symbols(("s", "f"), np.uint) s = t_symbols[0] assert t_symbols[0] == TypedSymbol("s", np.uint) assert s.dtype.is_uint() assert s.dtype.is_complex() == 0 assert typed_symbols("s", str).dtype.is_other() assert typed_symbols("s", bool).dtype.is_other() assert typed_symbols("s", np.void).dtype.is_other() assert typed_symbols("s", np.float64).dtype.base_name == 'double' # removed for old sympy version # assert typed_symbols(("s"), np.float64).dtype.sympy_dtype == typed_symbols(("s"), float).dtype.sympy_dtype f, g = ps.fields("f, g : double[2D]") expr = ps.Assignment(f.center(), 2 * g.center() + 5) new_expr = type_all_numbers(expr, np.float64) assert "cast_func(2, double)" in str(new_expr) assert "cast_func(5, double)" in str(new_expr) m = matrix_symbols("a, b", np.uint, 3, 3) assert len(m) == 2 m = m[0] for i, elem in enumerate(m): assert elem == TypedSymbol(f"a{i}", np.uint) assert elem.dtype.is_uint() assert TypedSymbol("s", np.uint).canonical == TypedSymbol("s", np.uint) assert TypedSymbol("s", np.uint).reversed == TypedSymbol("s", np.uint)
def get_coordinate_symbol_assignment(name): for idx_field in index_fields: assert isinstance( idx_field.dtype, StructType), "Index fields have to have a struct data type" data_type = idx_field.dtype if data_type.has_element(name): rhs = idx_field[0](name) lhs = TypedSymbol(name, BasicType(data_type.get_element_type(name))) return SympyAssignment(lhs, rhs) raise ValueError( "Index %s not found in any of the passed index fields" % (name, ))
def add_neumann_boundary(eqs, fields, flag_field, boundary_flag="neumann_flag", inverse_flag=False): """ Replaces all neighbor accesses by flag field guarded accesses. If flag in neighboring cell is set, the center value is used instead Args: eqs: list of equations containing field accesses to direct neighbors fields: fields for which the Neumann boundary should be applied flag_field: integer field marking boundary cells boundary_flag: if flag field has value 'boundary_flag' (no bit operations yet) the cell is assumed to be boundary inverse_flag: if true, boundary cells are where flag field has not the value of boundary_flag Returns: list of equations with guarded field accesses """ if not hasattr(fields, "__len__"): fields = [fields] fields = set(fields) if type(boundary_flag) is str: boundary_flag = TypedSymbol(boundary_flag, dtype=create_type(DEFAULT_FLAG_TYPE)) substitutions = {} for eq in eqs: for fa in eq.atoms(Field.Access): if fa.field not in fields: continue if not all(offset in (-1, 0, 1) for offset in fa.offsets): raise ValueError("Works only for single neighborhood stencils") if all(offset == 0 for offset in fa.offsets): continue if inverse_flag: condition = sp.Eq( bitwise_and(flag_field[tuple(fa.offsets)], boundary_flag), 0) else: condition = sp.Ne( bitwise_and(flag_field[tuple(fa.offsets)], boundary_flag), 0) center = fa.field(*fa.index) substitutions[fa] = sp.Piecewise((center, condition), (fa, True)) return [eq.subs(substitutions) for eq in eqs]
class BoundaryOffsetInfo(CustomCodeNode): # --------------------------- Functions to be used by boundaries -------------------------- @staticmethod def offset_from_dir(dir_idx, dim): return tuple([ sp.IndexedBase(symbol, shape=(1, ))[dir_idx] for symbol in BoundaryOffsetInfo._offset_symbols(dim) ]) @staticmethod def inv_dir(dir_idx): return sp.IndexedBase(BoundaryOffsetInfo.INV_DIR_SYMBOL, shape=(1, ))[dir_idx] # ---------------------------------- Internal --------------------------------------------- def __init__(self, stencil): dim = len(stencil[0]) offset_sym = BoundaryOffsetInfo._offset_symbols(dim) code = "\n" for i in range(dim): offset_str = ", ".join([str(d[i]) for d in stencil]) code += "const int64_t %s [] = { %s };\n" % (offset_sym[i].name, offset_str) inv_dirs = [] for direction in stencil: inverse_dir = tuple([-i for i in direction]) inv_dirs.append(str(stencil.index(inverse_dir))) code += "const int %s [] = { %s };\n" % (self.INV_DIR_SYMBOL.name, ", ".join(inv_dirs)) offset_symbols = BoundaryOffsetInfo._offset_symbols(dim) super(BoundaryOffsetInfo, self).__init__( code, symbols_read=set(), symbols_defined=set(offset_symbols + [self.INV_DIR_SYMBOL])) @staticmethod def _offset_symbols(dim): return [ TypedSymbol("c%s" % (d, ), create_type(np.int64)) for d in ['x', 'y', 'z'][:dim] ] INV_DIR_SYMBOL = TypedSymbol("invdir", "int")
def test_dynamic_function(): x, y = pystencils.fields('x, y: float32[3d]') a = sp.symbols('a') my_fun_call = DynamicFunction( TypedSymbol('my_fun', 'std::function<double(double)>'), create_type('double'), a) assignments = pystencils.AssignmentCollection( {y.center: x.center + my_fun_call}) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter()) template_fun_call = DynamicFunction( TypedSymbol('my_fun', TemplateType('Functor_T')), create_type('double'), a, x.center) assignments = pystencils.AssignmentCollection( {y.center: x.center + template_fun_call}) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
def create_boundary_kernel(field, index_field, stencil, boundary_functor, target=Target.CPU, **kernel_creation_args): elements = [BoundaryOffsetInfo(stencil)] dir_symbol = TypedSymbol("dir", np.int64) elements += [Assignment(dir_symbol, index_field[0]('dir'))] elements += boundary_functor(field, direction_symbol=dir_symbol, index_field=index_field) config = CreateKernelConfig(index_fields=[index_field], target=target, **kernel_creation_args) return create_kernel(elements, config=config)
def create_boundary_kernel(field, index_field, stencil, boundary_functor, target='cpu', openmp=True): elements = [BoundaryOffsetInfo(stencil)] index_arr_dtype = index_field.dtype.numpy_dtype dir_symbol = TypedSymbol("dir", index_arr_dtype.fields['dir'][0]) elements += [Assignment(dir_symbol, index_field[0]('dir'))] elements += boundary_functor(field, direction_symbol=dir_symbol, index_field=index_field) return create_indexed_kernel(elements, [index_field], target=target, cpu_openmp=openmp)
def undefined_symbols(self) -> Set[sp.Symbol]: field_map = {f.name: f for f in self.fields_accessed} undefined_field_symbols = self.symbols_defined corresponding_field_names = { s.field_name for s in undefined_field_symbols if hasattr(s, 'field_name') } corresponding_field_names |= { s.field_names[0] for s in undefined_field_symbols if hasattr(s, 'field_names') } return { TypedSymbol( f, self.CLASS_NAME_TEMPLATE.format(dtype=field_map[f].dtype, ndim=field_map[f].ndim) + '&') for f in corresponding_field_names } | (self.body.undefined_symbols - undefined_field_symbols)
def test_global_definitions_with_global_symbol(): # Teach our printer to print new ast nodes CBackend._print_BogusUsage = lambda _, __: "// Bogus would go here" CBackend._print_BogusDeclaration = lambda _, __: "// Declaration would go here" z, x, y = pystencils.fields("z, y, x: [2d]") normal_assignments = pystencils.AssignmentCollection([ pystencils.Assignment(z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0])) ], []) ast = pystencils.create_kernel(normal_assignments) print(pystencils.show_code(ast)) ast.body.append(BogusUsage(requires_global=True)) print(pystencils.show_code(ast)) kernel = ast.compile() assert kernel is not None assert TypedSymbol( 'Foo', 'double') not in [p.symbol for p in ast.get_parameters()]
def undefined_symbols(self) -> Set[sp.Symbol]: field_map = {f.name: f for f in self.fields_accessed} undefined_field_symbols = self.symbols_defined corresponding_field_names = { s.field_name for s in undefined_field_symbols if hasattr(s, 'field_name') } corresponding_field_names |= { s.field_names[0] for s in undefined_field_symbols if hasattr(s, 'field_names') } return { TypedSymbol( f, self.CLASS_NAME_TEMPLATE.format( dtype=(field_map.get(f) or field_map.get('diff' + f)).dtype, ndim=(field_map.get(f) or field_map.get('diff' + f)).ndim) + ('&' if self.ARGS_AS_REFERENCE else '')) for f in corresponding_field_names } | (self.body.undefined_symbols - undefined_field_symbols)
def test_dynamic_matrix(): x, y = pystencils.fields('x, y: float32[3d]') try: from pystencils.data_types import TypedMatrixSymbol except ImportError: import pytest pytest.skip() a = sp.symbols('a') A = TypedMatrixSymbol('A', 3, 1, create_type('double'), 'Vector3<double>') my_fun_call = DynamicFunction( TypedSymbol('my_fun', 'std::function<Vector3<double>(double)>'), A.dtype, a) assignments = pystencils.AssignmentCollection({ A: my_fun_call, y.center: A[0] + A[1] + A[2] }) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_aligned, nontemporal_fields, strided, keep_loop_stop, assume_sufficient_line_padding): """Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type.""" all_loops = filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment) inner_loops = [n for n in all_loops if n.is_innermost_loop] zero_loop_counters = {l.loop_counter_symbol: 0 for l in all_loops} for loop_node in inner_loops: loop_range = loop_node.stop - loop_node.start # cut off loop tail, that is not a multiple of four if keep_loop_stop: pass elif assume_aligned and assume_sufficient_line_padding: loop_range = loop_node.stop - loop_node.start new_stop = loop_node.start + modulo_ceil(loop_range, vector_width) loop_node.stop = new_stop else: cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start loop_nodes = [l for l in cut_loop(loop_node, [cutting_point]).args if isinstance(l, ast.LoopOverCoordinate)] assert len(loop_nodes) in (0, 1, 2) # 2 for main and tail loop, 1 if loop range divisible by vector width if len(loop_nodes) == 0: continue loop_node = loop_nodes[0] # Find all array accesses (indexed) that depend on the loop counter as offset loop_counter_symbol = ast.LoopOverCoordinate.get_loop_counter_symbol(loop_node.coordinate_to_loop_over) substitutions = {} successful = True for indexed in loop_node.atoms(sp.Indexed): base, index = indexed.args if loop_counter_symbol in index.atoms(sp.Symbol): loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms() aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) == 0 stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index) if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()): successful = False break typed_symbol = base.label assert type(typed_symbol.dtype) is PointerType, \ f"Type of access is {typed_symbol.dtype}, {indexed}" vec_type = VectorType(typed_symbol.dtype.base_type, vector_width) use_aligned_access = aligned_access and assume_aligned nontemporal = False if hasattr(indexed, 'field'): nontemporal = (indexed.field in nontemporal_fields) or (indexed.field.name in nontemporal_fields) substitutions[indexed] = vector_memory_access(indexed, vec_type, use_aligned_access, nontemporal, True, stride if strided else 1) if nontemporal: # insert NontemporalFence after the outermost loop parent = loop_node.parent while type(parent.parent.parent) is not ast.KernelFunction: parent = parent.parent parent.parent.insert_after(NontemporalFence(), parent, if_not_exists=True) # insert CachelineSize at the beginning of the kernel parent.parent.insert_front(CachelineSize(), if_not_exists=True) if not successful: warnings.warn("Could not vectorize loop because of non-consecutive memory access") continue loop_node.step = vector_width loop_node.subs(substitutions) vector_int_width = ast_node.instruction_set['intwidth'] vector_loop_counter = cast_func(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width)) \ + cast_func(tuple(range(vector_int_width if type(vector_int_width) is int else 2)), VectorType(loop_counter_symbol.dtype, vector_int_width)) fast_subs(loop_node, {loop_counter_symbol: vector_loop_counter}, skip=lambda e: isinstance(e, ast.ResolvedFieldAccess) or isinstance(e, vector_memory_access)) mask_conditionals(loop_node) from pystencils.rng import RNGBase substitutions = {} for rng in loop_node.atoms(RNGBase): new_result_symbols = [TypedSymbol(s.name, VectorType(s.dtype, width=vector_width)) for s in rng.result_symbols] substitutions.update({s[0]: s[1] for s in zip(rng.result_symbols, new_result_symbols)}) rng._symbols_defined = set(new_result_symbols) fast_subs(loop_node, substitutions, skip=lambda e: isinstance(e, RNGBase))
def undefined_symbols(self): """Symbols which are used but are not defined inside this node.""" return {TypedSymbol('Foo', 'double')}
def generate_boundary(generation_context, class_name, boundary_object, lb_method, **create_kernel_params): struct_name = "IndexInfo" boundary_object.name = class_name create_kernel_params = default_create_kernel_parameters( generation_context, create_kernel_params) target = create_kernel_params['target'] index_struct_dtype = numpy_data_type_for_boundary_object( boundary_object, lb_method.dim) pdf_field = Field.create_generic( 'pdfs', lb_method.dim, np.float64 if generation_context.double_accuracy else np.float32, index_dimensions=1, layout='fzyx', index_shape=[len(lb_method.stencil)]) index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0], shape=(TypedSymbol("indexVectorSize", create_type(np.int64)), 1), strides=(1, 1)) kernel = create_lattice_boltzmann_boundary_kernel( pdf_field, index_field, lb_method, boundary_object, target=target, openmp=generation_context.openmp) kernel.function_name = "boundary_" + boundary_object.name # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D if lb_method.dim == 2: stencil = () for d in lb_method.stencil: d = d + (0, ) stencil = stencil + (d, ) else: stencil = lb_method.stencil stencil_info = [(i, ", ".join([str(e) for e in d])) for i, d in enumerate(stencil)] context = { 'class_name': boundary_object.name, 'StructName': struct_name, 'StructDeclaration': struct_from_numpy_dtype(struct_name, index_struct_dtype), 'kernel': KernelInfo(kernel), 'stencil_info': stencil_info, 'dim': lb_method.dim, 'target': target, 'namespace': 'lbm', } env = Environment(loader=PackageLoader('lbmpy_walberla'), undefined=StrictUndefined) add_pystencils_filters_to_jinja_env(env) header = env.get_template('Boundary.tmpl.h').render(**context) source = env.get_template('Boundary.tmpl.cpp').render(**context) source_extension = "cpp" if create_kernel_params.get( "target", "cpu") == "cpu" else "cu" generation_context.write_file("{}.h".format(class_name), header) generation_context.write_file("{}.{}".format(class_name, source_extension), source)
projection_matrices = pystencils.fields('matrices: float32[3d]') inv_matrices = pystencils.fields('inv_matrices: float32[3d]') source_points = pystencils.fields('source_points: float32[1d]') volume_slice = pystencils.fields('volume_slice: float32[2d]') projections_1d = pystencils.fields('projections_1d: float32[2d]') ray_vectors = pystencils.fields('ray_vectors: float32[2d]') FUNCTIONS = { 'Cone_Backprojection3D_Kernel_Launcher': CustomFunctionCall('Cone_Backprojection3D_Kernel_Launcher', FieldPointerSymbol(projection.name, projection.dtype, const=True), FieldPointerSymbol(volume.name, volume.dtype, const=False), FieldPointerSymbol(projection_matrices.name, projection_matrices.dtype, const=True), FieldShapeSymbol(['matrices'], 0), *[FieldShapeSymbol(['volume'], i) for i in range(2, -1, -1)], TypedSymbol('volume_spacing_x', create_type('float32'), const=True), TypedSymbol('volume_spacing_y', create_type('float32'), const=True), TypedSymbol('volume_spacing_z', create_type('float32'), const=True), TypedSymbol('volume_origin_x', create_type('float32'), const=True), TypedSymbol('volume_origin_y', create_type('float32'), const=True), TypedSymbol('volume_origin_z', create_type('float32'), const=True), *[FieldShapeSymbol(['projection'], i) for i in range(2, 0, -1)], TypedSymbol('projection_multiplier', create_type('float32'), const=True), backend='gpucuda', fields_accessed=[volume, projection, projection_matrices], custom_signature=""" void Cone_Backprojection3D_Kernel_Launcher(const float *sinogram_ptr, float *out, const float *projection_matrices, const int number_of_projections, const int volume_width, const int volume_height, const int volume_depth, const float volume_spacing_x, const float volume_spacing_y, const float volume_spacing_z, const float volume_origin_x, const float volume_origin_y, const float volume_origin_z, const int detector_width, const int detector_height, const float projection_multiplier); """), # noqa
def test_typed_symbol(): ts = TypedSymbol("s", "double") copy(ts) ts_copy = deepcopy(ts) assert str(ts_copy.dtype).strip() == "double"
def symbols_defined(self): """Set of symbols which are defined by this node.""" return {TypedSymbol('Foo', 'double')}
import pystencils as ps from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor from pystencils_walberla import generate_pack_info_from_kernel from pystencils_walberla import CodeGeneration, generate_sweep from pystencils.data_types import TypedSymbol from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter omega = sp.symbols("omega") compile_time_block_size = False if compile_time_block_size: sweep_block_size = (128, 1, 1) else: sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32), TypedSymbol("cudaBlockSize1", np.int32), 1) sweep_params = {'block_size': sweep_block_size} options_dict = { 'srt': { 'method': 'srt', 'stencil': 'D3Q19', 'relaxation_rate': omega, 'compressible': False, }, 'trt': { 'method': 'trt', 'stencil': 'D3Q19', 'relaxation_rate': omega,
def type_expr(eq, dtype): eq = type_all_numbers(eq, dtype=dtype) return eq.subs( {s: TypedSymbol(s.name, dtype) for s in eq.atoms(sp.Symbol)})