def test_flag_condition(): f_arr = np.zeros((2, 2, 2), dtype=np.float64) mask_arr = np.zeros((2, 2), dtype=np.uint64) mask_arr[0, 1] = (1 << 3) mask_arr[1, 0] = (1 << 5) mask_arr[1, 1] = (1 << 3) + (1 << 5) f = Field.create_from_numpy_array('f', f_arr, index_dimensions=1) mask = Field.create_from_numpy_array('mask', mask_arr) v1 = 42.3 v2 = 39.7 v3 = 119.87 assignments = [ Assignment(f(0), flag_cond(3, mask(0), v1)), Assignment(f(1), flag_cond(5, mask(0), v2, v3)) ] kernel = create_kernel(assignments).compile() kernel(f=f_arr, mask=mask_arr) reference = np.zeros((2, 2, 2), dtype=np.float64) reference[0, 1, 0] = v1 reference[1, 1, 0] = v1 reference[0, 0, 1] = v3 reference[0, 1, 1] = v3 reference[1, 0, 1] = v2 reference[1, 1, 1] = v2 np.testing.assert_array_equal(f_arr, reference)
def test_indexed_cuda_kernel(): try: import pycuda except ImportError: pycuda = None if pycuda: from pystencils.gpucuda import make_python_function import pycuda.gpuarray as gpuarray from pystencils.gpucuda.kernelcreation import created_indexed_cuda_kernel arr = np.zeros((3, 4)) dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)]) index_arr = np.zeros((3, ), dtype=dtype) index_arr[0] = (0, 2, 3.0) index_arr[1] = (1, 3, 42.0) index_arr[2] = (2, 1, 5.0) indexed_field = Field.create_from_numpy_array('index', index_arr) normal_field = Field.create_from_numpy_array('f', arr) update_rule = Assignment(normal_field[0, 0], indexed_field('value')) ast = created_indexed_cuda_kernel([update_rule], [indexed_field]) kernel = make_python_function(ast) gpu_arr = gpuarray.to_gpu(arr) gpu_index_arr = gpuarray.to_gpu(index_arr) kernel(f=gpu_arr, index=gpu_index_arr) gpu_arr.get(arr) for i in range(index_arr.shape[0]): np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13) else: print("Did not run test on GPU since no pycuda is available")
def test_sliced_iteration_llvm(): size = (4, 4) src_arr = np.ones(size) dst_arr = np.zeros_like(src_arr) src_field = Field.create_from_numpy_array('src', src_arr) dst_field = Field.create_from_numpy_array('dst', dst_arr) a, b = sp.symbols("a b") update_rule = Assignment(dst_field[0, 0], (a * src_field[0, 1] + a * src_field[0, -1] + b * src_field[1, 0] + b * src_field[-1, 0]) / 4) x_end = TypedSymbol("x_end", "int") s = make_slice[1:x_end, 1] x_end_value = size[1] - 1 import pystencils.llvm as llvm_generator ast = llvm_generator.create_kernel(sympy_cse_on_assignment_list( [update_rule]), iteration_slice=s) kernel = llvm_generator.make_python_function(ast) kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value) expected_result = np.zeros(size) expected_result[1:x_end_value, 1] = 1 np.testing.assert_almost_equal(expected_result, dst_arr)
def test_full_scalar_field(): """Tests fully (un)packing a scalar field (from)to a GPU buffer.""" fields = _generate_fields() for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields: src_field = Field.create_from_numpy_array("src_field", src_arr) dst_field = Field.create_from_numpy_array("dst_field", src_arr) buffer = Field.create_generic("buffer", spatial_dimensions=1, field_type=FieldType.BUFFER, dtype=src_arr.dtype) pack_eqs = [Assignment(buffer.center(), src_field.center())] pack_types = { 'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype } pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types) pack_kernel = make_python_function(pack_code) pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr) unpack_eqs = [Assignment(dst_field.center(), buffer.center())] unpack_types = { 'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype } unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types) unpack_kernel = make_python_function(unpack_code) unpack_kernel(dst_field=gpu_dst_arr, buffer=gpu_buffer_arr) dst_arr = gpu_dst_arr.get() np.testing.assert_equal(src_arr, dst_arr)
def test_full_scalar_field(): """Tests fully (un)packing a scalar field (from)to a buffer.""" fields = _generate_fields() for (src_arr, dst_arr, buffer_arr) in fields: src_field = Field.create_from_numpy_array("src_field", src_arr) dst_field = Field.create_from_numpy_array("dst_field", dst_arr) buffer = Field.create_generic("buffer", spatial_dimensions=1, field_type=FieldType.BUFFER, dtype=src_arr.dtype) pack_eqs = [Assignment(buffer.center(), src_field.center())] pack_code = create_kernel(pack_eqs, data_type={ 'src_field': src_arr.dtype, 'buffer': buffer.dtype }) pack_kernel = pack_code.compile() pack_kernel(buffer=buffer_arr, src_field=src_arr) unpack_eqs = [Assignment(dst_field.center(), buffer.center())] unpack_code = create_kernel(unpack_eqs, data_type={ 'dst_field': dst_arr.dtype, 'buffer': buffer.dtype }) unpack_kernel = unpack_code.compile() unpack_kernel(dst_field=dst_arr, buffer=buffer_arr) np.testing.assert_equal(src_arr, dst_arr)
def wiener_filtering(complex_field: Field, output_weight_field: Field, sigma): assert complex_field.index_dimensions == 3 assert output_weight_field.index_dimensions == 1 assignments = [] norm_factor = complex_field.index_shape[0] * complex_field.index_shape[1] wiener_sum = [] for stack_index in range(complex_field.index_shape[0]): for patch_index in range(complex_field.index_shape[1]): magnitude = sum(complex_field.center(stack_index, patch_index, i) ** 2 for i in (0, 1)) val = magnitude / norm_factor # implementation differ whether to apply norm_factor on val on wien wien = val / (val + sigma * sigma) wiener_sum.append(wien**2) assignments.extend( pystencils.Assignment(complex_field.center(stack_index, patch_index, i), complex_field.center(stack_index, patch_index, i) * wien) for i in (0, 1) ) assignments.append(pystencils.Assignment( output_weight_field.center(stack_index), 1 / sympy.Add(*wiener_sum) )) return AssignmentCollection(assignments)
def generic_instationary_filter(input_field: pystencils.Field, output_field: pystencils.Field, stencil, weighting_function, normalize_weights=True): """Implements a generic instationary filter. The filter weight depends on the current stencil offset, the function value there and the central function value at stencil center. :param input_field: :type input_field: pystencils.Field :param output_field: :type output_field: pystencils.Field :param stencil: :param weighting_function: A function that takes current offset, offset function value and stencils center function value :param normalize_weights: whether or not to normalize weights to a sum of one """ weights = 0 sum = 0 for s in stencil: weight = weighting_function(s, input_field[s], input_field.center()) weights += weight sum += weight * input_field[s] assignments = AssignmentCollection( {output_field.center(): sum / weights if normalize_weights else sum}) return assignments
def apply_wieners(complex_field: Field, wieners: Field, output_weight_field: Field): assert complex_field.index_dimensions == 3 assert wieners.index_dimensions == 2 assert output_weight_field.index_dimensions == 1 assignments = [] wiener_sum = [] for stack_index in range(complex_field.index_shape[0]): for patch_index in range(complex_field.index_shape[1]): wien = wieners(stack_index, patch_index) wiener_sum.append(wien**2) assignments.extend( pystencils.Assignment(complex_field.center(stack_index, patch_index, i), complex_field.center(stack_index, patch_index, i) * wien) for i in (0, 1) ) assignments.append(pystencils.Assignment( output_weight_field.center(stack_index), 1 / sympy.Add(*wiener_sum) )) return AssignmentCollection(assignments)
def test_jacobi_fixed_field_size(): size = (30, 20) src_field_llvm = np.random.rand(*size) src_field_py = np.copy(src_field_llvm) dst_field_llvm = np.zeros(size) dst_field_py = np.zeros(size) f = Field.create_from_numpy_array("f", src_field_llvm) d = Field.create_from_numpy_array("d", dst_field_llvm) jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) ast = create_kernel([jacobi]) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): dst_field_py[ x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] + src_field_py[x, y - 1] + src_field_py[x, y + 1]) jit = generate_and_jit(ast) jit('kernel', dst_field_llvm, src_field_llvm) error = np.sum(np.abs(dst_field_py - dst_field_llvm)) np.testing.assert_almost_equal(error, 0.0)
def hard_thresholding(complex_field: Field, output_weight_field, threshold): assert complex_field.index_dimensions == 3 assert output_weight_field.index_dimensions == 1 assignments = [] for stack_index in range(complex_field.index_shape[0]): num_nonzeros = [] for patch_index in range(complex_field.index_shape[1]): magnitude = sum(complex_field.center(stack_index, patch_index, i) ** 2 for i in (0, 1)) assignments.extend( pystencils.Assignment(complex_field.center(stack_index, patch_index, i), sympy.Piecewise( (complex_field.center(stack_index, patch_index, i), magnitude > threshold ** 2), (0, True))) for i in (0, 1) ) num_nonzeros.append(sympy.Piecewise((1, magnitude > threshold ** 2), (0, True))) assignments.append(pystencils.Assignment( output_weight_field.center(stack_index), sympy.Add(*num_nonzeros) )) return AssignmentCollection(assignments)
def test_jacobi_variable_field_size(): size = (3, 3, 3) f = Field.create_generic("f", 3) d = Field.create_generic("d", 3) jacobi = Assignment( d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4) ast = create_kernel([jacobi]) src_field_llvm = np.random.rand(*size) src_field_py = np.copy(src_field_llvm) dst_field_llvm = np.zeros(size) dst_field_py = np.zeros(size) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): for z in range(1, size[2] - 1): dst_field_py[x, y, z] = 0.25 * ( src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] + src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z]) kernel = make_python_function(ast, { 'f': src_field_llvm, 'd': dst_field_llvm }) kernel() error = np.sum(np.abs(dst_field_py - dst_field_llvm)) np.testing.assert_almost_equal(error, 0.0)
def test_variable_sized_fields(): src_field = Field.create_generic('src', spatial_dimensions=2) dst_field = Field.create_generic('dst', spatial_dimensions=2) update_rule = Assignment(dst_field[0, 0], (src_field[0, 1] + src_field[0, -1] + src_field[1, 0] + src_field[-1, 0]) / 4) ast = create_cuda_kernel(sympy_cse_on_assignment_list([update_rule])) kernel = make_python_function(ast) size = (3, 3) src_arr = np.random.rand(*size) src_arr = add_ghost_layers(src_arr) dst_arr = np.zeros_like(src_arr) gpu_src_arr = gpuarray.to_gpu(src_arr) gpu_dst_arr = gpuarray.to_gpu(dst_arr) kernel(src=gpu_src_arr, dst=gpu_dst_arr) gpu_dst_arr.get(dst_arr) stencil = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) / 4.0 reference = convolve(remove_ghost_layers(src_arr), stencil, mode='constant', cval=0.0) reference = add_ghost_layers(reference) np.testing.assert_almost_equal(reference, dst_arr)
def test_ghost_layer(): size = (6, 5) src_arr = np.ones(size) dst_arr = np.zeros_like(src_arr) src_field = Field.create_from_numpy_array('src', src_arr, index_dimensions=0) dst_field = Field.create_from_numpy_array('dst', dst_arr, index_dimensions=0) update_rule = Assignment(dst_field[0, 0], src_field[0, 0]) ghost_layers = [(1, 2), (2, 1)] ast = create_cuda_kernel([update_rule], ghost_layers=ghost_layers, indexing_creator=LineIndexing) kernel = make_python_function(ast) gpu_src_arr = gpuarray.to_gpu(src_arr) gpu_dst_arr = gpuarray.to_gpu(dst_arr) kernel(src=gpu_src_arr, dst=gpu_dst_arr) gpu_dst_arr.get(dst_arr) reference = np.zeros_like(src_arr) reference[ghost_layers[0][0]:-ghost_layers[0][1], ghost_layers[1][0]:-ghost_layers[1][1]] = 1 np.testing.assert_equal(reference, dst_arr)
def __init__(self, boundary, method, pdf_field_sparse): full_pdf_field = Field.create_generic('pdfFull', spatial_dimensions=method.dim, index_dimensions=1) additional_data_field = Field.create_generic('additionalData', spatial_dimensions=1, dtype=boundary.additional_data) boundary_eqs = boundary(full_pdf_field, self.DIR_SYMBOL, method, additional_data_field) neighbor_offsets = {fa.offsets for eq in boundary_eqs for fa in eq.atoms(Field.Access)} neighbor_offsets = list(neighbor_offsets) neighbor_offsets_dtype = [(self.NEIGHBOR_IDX_NAME.format(i), np.uint32) for i in range(len(neighbor_offsets))] index_field_dtype = np.dtype([('dir', np.uint32), *neighbor_offsets_dtype, *boundary.additional_data]) index_field = Field.create_generic('indexField', spatial_dimensions=1, dtype=index_field_dtype) boundary_eqs = boundary(full_pdf_field, self.DIR_SYMBOL, method, index_field) offset_subs = {off: sp.Symbol(self.NEIGHBOR_IDX_NAME.format(i)) for i, off in enumerate(neighbor_offsets)} new_boundary_eqs = [] for eq in boundary_eqs: substitutions = { fa: pdf_field_sparse.absolute_access([index_field(offset_subs[fa.offsets].name)], fa.index) for fa in eq.atoms(Field.Access) if fa.field == full_pdf_field } new_boundary_eqs.append(eq.subs(substitutions)) self.boundary_eqs = new_boundary_eqs self.boundary_eqs_orig = boundary_eqs self.method = method self.index_field_dtype = index_field_dtype self.neighbor_offsets = neighbor_offsets self.index_field = index_field
def test_fixed_size_mismatch_check(): """Create kernel with two differently sized but constant fields """ src = np.zeros((20, 21, 9)) dst = np.zeros((21, 21, 9)) sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1) sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1) update_rule = Assignment(sym_dst(0), sym_src[-1, 1](1) + sym_src[1, -1](2)) with pytest.raises(ValueError) as e: create_kernel([update_rule]) assert 'Differently sized field accesses' in str(e.value)
def aggregate(block_scores: Field, patch_input_field: Field, destination_field: Field, block_stencil, matching_stencil, threshold, max_selected, compilation_target, patch_weights: Field = None, accumulated_weights: Field = None, **compilation_kwargs): max_offset = max(max(o) for o in matching_stencil) max_offset += max(max(o) for o in block_stencil) offset = pystencils_reco.typed_symbols('_o:%i' % patch_input_field.spatial_dimensions, 'int32') copies = [] assert destination_field.index_dimensions == 2 assert destination_field.index_shape[-1] == len(block_stencil) n, nth_hit = pystencils_reco.typed_symbols('_n, nth_hit', 'int32') for i, s in enumerate(block_stencil): shifted = tuple(s + o for s, o in zip(offset, s)) weight = patch_weights.center(nth_hit) if patch_weights else 1 assignment = pystencils.Assignment(_get_dummy_symbol(), sympy.Function('atomicAdd')(address_of(patch_input_field[shifted]), weight * destination_field.center(nth_hit, i))) copies.append(assignment) if accumulated_weights: assignment = pystencils.Assignment(_get_dummy_symbol(), sympy.Function('atomicAdd')( address_of(accumulated_weights[shifted]), weight)) copies.append(assignment) assignments = AssignmentCollection(copies) ast = pystencils.create_kernel(assignments, target=compilation_target, data_type=patch_input_field.dtype, ghost_layers=max_offset, **compilation_kwargs) ast._body = Select(ast.body, what=offset, from_iterable=matching_stencil, predicate=block_scores.center(n) < threshold, counter_symbol=n, hit_counter_symbol=nth_hit, compilation_target=compilation_target, max_selected=max_selected) return ast.compile()
def test_subset_cell_values(): """Tests (un)packing a subset of cell values of the a field (from)to a buffer.""" num_cell_values = 7 # Cell indices of the field to be (un)packed (from)to the buffer cell_indices = [1, 3, 5, 6] fields = _generate_fields(stencil_directions=num_cell_values) for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields: src_field = Field.create_from_numpy_array("src_field", gpu_src_arr, index_dimensions=1) dst_field = Field.create_from_numpy_array("dst_field", gpu_src_arr, index_dimensions=1) buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1, field_type=FieldType.BUFFER, dtype=gpu_src_arr.dtype) pack_eqs = [] # Since we are packing all cell values for all cells, then # the buffer index is equivalent to the field index for buffer_idx, cell_idx in enumerate(cell_indices): eq = Assignment(buffer(buffer_idx), src_field(cell_idx)) pack_eqs.append(eq) pack_types = { 'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype } pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types) pack_kernel = make_python_function(pack_code) pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr) unpack_eqs = [] for buffer_idx, cell_idx in enumerate(cell_indices): eq = Assignment(dst_field(cell_idx), buffer(buffer_idx)) unpack_eqs.append(eq) unpack_types = { 'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype } unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types) unpack_kernel = make_python_function(unpack_code) unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr) dst_arr = gpu_dst_arr.get() mask_arr = np.ma.masked_where((src_arr - dst_arr) != 0, src_arr) np.testing.assert_equal(dst_arr, mask_arr.filled(int(0)))
def test_subset_cell_values(): """Tests (un)packing a subset of cell values of the a field (from)to a buffer.""" num_cell_values = 19 # Cell indices of the field to be (un)packed (from)to the buffer cell_indices = [1, 5, 7, 8, 10, 12, 13] fields = _generate_fields(num_directions=num_cell_values) for (src_arr, dst_arr, bufferArr) in fields: src_field = Field.create_from_numpy_array("src_field", src_arr, index_dimensions=1) dst_field = Field.create_from_numpy_array("dst_field", dst_arr, index_dimensions=1) buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1, field_type=FieldType.BUFFER, dtype=src_arr.dtype) pack_eqs = [] # Since we are packing all cell values for all cells, then # the buffer index is equivalent to the field index for buffer_idx, cell_idx in enumerate(cell_indices): eq = Assignment(buffer(buffer_idx), src_field(cell_idx)) pack_eqs.append(eq) pack_code = create_kernel(pack_eqs, data_type={ 'src_field': src_arr.dtype, 'buffer': buffer.dtype }) pack_kernel = pack_code.compile() pack_kernel(buffer=bufferArr, src_field=src_arr) unpack_eqs = [] for buffer_idx, cell_idx in enumerate(cell_indices): eq = Assignment(dst_field(cell_idx), buffer(buffer_idx)) unpack_eqs.append(eq) unpack_code = create_kernel(unpack_eqs, data_type={ 'dst_field': dst_arr.dtype, 'buffer': buffer.dtype }) unpack_kernel = unpack_code.compile() unpack_kernel(buffer=bufferArr, dst_field=dst_arr) mask_arr = np.ma.masked_where((src_arr - dst_arr) != 0, src_arr) np.testing.assert_equal(dst_arr, mask_arr.filled(int(0)))
def test_fixed_and_variable_field_check(): """Create kernel with two variable sized fields - calling them with different sizes""" src = np.zeros((20, 21, 9)) sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1) sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1) update_rule = Assignment(sym_dst(0), sym_src[-1, 1](1) + sym_src[1, -1](2)) with pytest.raises(ValueError) as e: create_kernel(update_rule) assert 'Mixing fixed-shaped and variable-shape fields' in str(e.value)
def test_fixed_size_mismatch_check(): """Create kernel with two differently sized but constant fields """ src = np.zeros((20, 21, 9)) dst = np.zeros((21, 21, 9)) sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1) sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1) update_rule = Assignment(sym_dst(0), sym_src[-1, 1](1) + sym_src[1, -1](2)) try: create_kernel([update_rule]) assert False, "Expected ValueError because fields with different sized where passed" except ValueError: pass
def test_all_cell_values(): """Tests (un)packing all cell values of the a field (from)to a buffer.""" num_cell_values = 7 fields = _generate_fields(stencil_directions=num_cell_values) for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields: src_field = Field.create_from_numpy_array("src_field", gpu_src_arr, index_dimensions=1) dst_field = Field.create_from_numpy_array("dst_field", gpu_src_arr, index_dimensions=1) buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1, field_type=FieldType.BUFFER, dtype=gpu_src_arr.dtype) pack_eqs = [] # Since we are packing all cell values for all cells, then # the buffer index is equivalent to the field index for idx in range(num_cell_values): eq = Assignment(buffer(idx), src_field(idx)) pack_eqs.append(eq) pack_types = { 'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype } pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types) pack_kernel = make_python_function(pack_code) pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr) unpack_eqs = [] for idx in range(num_cell_values): eq = Assignment(dst_field(idx), buffer(idx)) unpack_eqs.append(eq) unpack_types = { 'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype } unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types) unpack_kernel = make_python_function(unpack_code) unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr) dst_arr = gpu_dst_arr.get() np.testing.assert_equal(src_arr, dst_arr)
def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, index_dim_shape=1, dtype=np.float64): """Copies a rectangular part of an array to another non-overlapping part""" f = Field.create_generic("pdfs", len(domain_size), index_dimensions=index_dimensions, dtype=dtype) normalized_from_slice = normalize_slice(from_slice, f.spatial_shape) normalized_to_slice = normalize_slice(to_slice, f.spatial_shape) offset = [ s1.start - s2.start for s1, s2 in zip(normalized_from_slice, normalized_to_slice) ] assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \ "Slices have to have same size" update_eqs = [] if index_dimensions < 2: index_dim_shape = [index_dim_shape] for i in product(*[range(d) for d in index_dim_shape]): eq = Assignment(f(*i), f[tuple(offset)](*i)) update_eqs.append(eq) ast = create_cuda_kernel(update_eqs, iteration_slice=to_slice, skip_independence_check=True) return ast
def test_fixed_and_variable_field_check(): """Create kernel with two variable sized fields - calling them with different sizes""" src = np.zeros((20, 21, 9)) sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1) sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1) update_rule = Assignment(sym_dst(0), sym_src[-1, 1](1) + sym_src[1, -1](2)) try: create_kernel([update_rule]) assert False, "Expected ValueError because fields with different sized where passed" except ValueError: pass
def extend_to_size_of_other_field(this_field: pystencils.Field, other_field: pystencils.Field): this_field.coordinate_transform = sp.DiagMatrix( sp.Matrix([ this_field.spatial_shape[i] / other_field.spatial_shape[i] for i in range(len(this_field.spatial_shape)) ]))
def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, index_dim_shape=1, dtype=np.float64): """Copies a rectangular part of an array to another non-overlapping part""" if index_dimensions not in (0, 1): raise NotImplementedError( "Works only for one or zero index coordinates") f = Field.create_generic("pdfs", len(domain_size), index_dimensions=index_dimensions, dtype=dtype) normalized_from_slice = normalize_slice(from_slice, f.spatial_shape) normalized_to_slice = normalize_slice(to_slice, f.spatial_shape) offset = [ s1.start - s2.start for s1, s2 in zip(normalized_from_slice, normalized_to_slice) ] assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \ "Slices have to have same size" update_eqs = [] for i in range(index_dim_shape): eq = Assignment(f(i), f[tuple(offset)](i)) update_eqs.append(eq) ast = create_cuda_kernel(update_eqs, iteration_slice=to_slice, skip_independence_check=True) return make_python_function(ast)
def generic_stationary_filter(input_field: pystencils.Field, output_field: pystencils.Field, stencil, weighting_function, normalize_weights=True): """generic_function_filter :param input_field: :type input_field: pystencils.Field :param output_field: :type output_field: pystencils.Field :param stencil: :param weighting_function: A function that takes a offset tuple and transfers it to weighting of the function value :param normalize_weights: whether or not to normalize weights to a sum of one """ weights = 0 sum = 0 for s in stencil: weight = weighting_function(s) weights += weight sum += weight * input_field[s] assignments = AssignmentCollection( {output_field.center(): sum / weights if normalize_weights else sum}) return assignments
def test_all_cell_values(): """Tests (un)packing all cell values of the a field (from)to a buffer.""" num_cell_values = 19 fields = _generate_fields(num_directions=num_cell_values) for (src_arr, dst_arr, bufferArr) in fields: src_field = Field.create_from_numpy_array("src_field", src_arr, index_dimensions=1) dst_field = Field.create_from_numpy_array("dst_field", dst_arr, index_dimensions=1) buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1, field_type=FieldType.BUFFER, dtype=src_arr.dtype) pack_eqs = [] # Since we are packing all cell values for all cells, then # the buffer index is equivalent to the field index for idx in range(num_cell_values): eq = Assignment(buffer(idx), src_field(idx)) pack_eqs.append(eq) pack_code = create_kernel(pack_eqs, data_type={ 'src_field': src_arr.dtype, 'buffer': buffer.dtype }) pack_kernel = pack_code.compile() pack_kernel(buffer=bufferArr, src_field=src_arr) unpack_eqs = [] for idx in range(num_cell_values): eq = Assignment(dst_field(idx), buffer(idx)) unpack_eqs.append(eq) unpack_code = create_kernel(unpack_eqs, data_type={ 'dst_field': dst_arr.dtype, 'buffer': buffer.dtype }) unpack_kernel = unpack_code.compile() unpack_kernel(buffer=bufferArr, dst_field=dst_arr) np.testing.assert_equal(src_arr, dst_arr)
def test_field_layouts(): num_cell_values = 7 for layout_str in ['numpy', 'fzyx', 'zyxf', 'reverse_numpy']: fields = _generate_fields(stencil_directions=num_cell_values, layout=layout_str) for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields: src_field = Field.create_from_numpy_array("src_field", gpu_src_arr, index_dimensions=1) dst_field = Field.create_from_numpy_array("dst_field", gpu_src_arr, index_dimensions=1) buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1, field_type=FieldType.BUFFER, dtype=src_arr.dtype) pack_eqs = [] # Since we are packing all cell values for all cells, then # the buffer index is equivalent to the field index for idx in range(num_cell_values): eq = Assignment(buffer(idx), src_field(idx)) pack_eqs.append(eq) pack_types = { 'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype } pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types) pack_kernel = make_python_function(pack_code) pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr) unpack_eqs = [] for idx in range(num_cell_values): eq = Assignment(dst_field(idx), buffer(idx)) unpack_eqs.append(eq) unpack_types = { 'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype } unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types) unpack_kernel = make_python_function(unpack_code) unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)
def mean_filter(input_field: pystencils.Field, output_field: pystencils.Field, stencil): assignments = { output_field.center(): sum(input_field[t] for t in stencil) / len(stencil) } return assignments
def test_field_slice(): """Tests (un)packing slices of a scalar field (from)to a buffer.""" fields = _generate_fields() for d in ['N', 'S', 'NW', 'SW', 'TNW', 'B']: for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields: # Extract slice from N direction of the field slice_dir = direction_string_to_offset(d, dim=len(src_arr.shape)) pack_slice = get_slice_before_ghost_layer(slice_dir) unpack_slice = get_ghost_region_slice(slice_dir) src_field = Field.create_from_numpy_array("src_field", src_arr[pack_slice]) dst_field = Field.create_from_numpy_array("dst_field", src_arr[unpack_slice]) buffer = Field.create_generic("buffer", spatial_dimensions=1, field_type=FieldType.BUFFER, dtype=src_arr.dtype) pack_eqs = [Assignment(buffer.center(), src_field.center())] pack_types = { 'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype } pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types) pack_kernel = make_python_function(pack_code) pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr[pack_slice]) # Unpack into ghost layer of dst_field in N direction unpack_eqs = [Assignment(dst_field.center(), buffer.center())] unpack_types = { 'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype } unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types) unpack_kernel = make_python_function(unpack_code) unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr[unpack_slice]) dst_arr = gpu_dst_arr.get() np.testing.assert_equal(src_arr[pack_slice], dst_arr[unpack_slice])