Python create_cuda_kernel Examples, pystencils.gpucuda.create_cuda_kernel Python Examples

Example #1

0

Show file

def test_full_scalar_field():
    """Tests fully (un)packing a scalar field (from)to a GPU buffer."""
    fields = _generate_fields()
    for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields:
        src_field = Field.create_from_numpy_array("src_field", src_arr)
        dst_field = Field.create_from_numpy_array("dst_field", src_arr)
        buffer = Field.create_generic("buffer",
                                      spatial_dimensions=1,
                                      field_type=FieldType.BUFFER,
                                      dtype=src_arr.dtype)

        pack_eqs = [Assignment(buffer.center(), src_field.center())]
        pack_types = {
            'src_field': gpu_src_arr.dtype,
            'buffer': gpu_buffer_arr.dtype
        }
        pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)

        pack_kernel = make_python_function(pack_code)
        pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)

        unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
        unpack_types = {
            'dst_field': gpu_dst_arr.dtype,
            'buffer': gpu_buffer_arr.dtype
        }
        unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types)

        unpack_kernel = make_python_function(unpack_code)
        unpack_kernel(dst_field=gpu_dst_arr, buffer=gpu_buffer_arr)

        dst_arr = gpu_dst_arr.get()

        np.testing.assert_equal(src_arr, dst_arr)

Example #2

0

Show file

def test_subset_cell_values():
    """Tests (un)packing a subset of cell values of the a field (from)to a buffer."""
    num_cell_values = 7
    # Cell indices of the field to be (un)packed (from)to the buffer
    cell_indices = [1, 3, 5, 6]
    fields = _generate_fields(stencil_directions=num_cell_values)
    for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields:
        src_field = Field.create_from_numpy_array("src_field",
                                                  gpu_src_arr,
                                                  index_dimensions=1)
        dst_field = Field.create_from_numpy_array("dst_field",
                                                  gpu_src_arr,
                                                  index_dimensions=1)
        buffer = Field.create_generic("buffer",
                                      spatial_dimensions=1,
                                      index_dimensions=1,
                                      field_type=FieldType.BUFFER,
                                      dtype=gpu_src_arr.dtype)

        pack_eqs = []
        # Since we are packing all cell values for all cells, then
        # the buffer index is equivalent to the field index
        for buffer_idx, cell_idx in enumerate(cell_indices):
            eq = Assignment(buffer(buffer_idx), src_field(cell_idx))
            pack_eqs.append(eq)

        pack_types = {
            'src_field': gpu_src_arr.dtype,
            'buffer': gpu_buffer_arr.dtype
        }
        pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)
        pack_kernel = make_python_function(pack_code)
        pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)

        unpack_eqs = []

        for buffer_idx, cell_idx in enumerate(cell_indices):
            eq = Assignment(dst_field(cell_idx), buffer(buffer_idx))
            unpack_eqs.append(eq)

        unpack_types = {
            'dst_field': gpu_dst_arr.dtype,
            'buffer': gpu_buffer_arr.dtype
        }
        unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types)
        unpack_kernel = make_python_function(unpack_code)
        unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)

        dst_arr = gpu_dst_arr.get()

        mask_arr = np.ma.masked_where((src_arr - dst_arr) != 0, src_arr)
        np.testing.assert_equal(dst_arr, mask_arr.filled(int(0)))

Example #3

0

Show file

def test_all_cell_values():
    """Tests (un)packing all cell values of the a field (from)to a buffer."""
    num_cell_values = 7
    fields = _generate_fields(stencil_directions=num_cell_values)
    for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields:
        src_field = Field.create_from_numpy_array("src_field",
                                                  gpu_src_arr,
                                                  index_dimensions=1)
        dst_field = Field.create_from_numpy_array("dst_field",
                                                  gpu_src_arr,
                                                  index_dimensions=1)
        buffer = Field.create_generic("buffer",
                                      spatial_dimensions=1,
                                      index_dimensions=1,
                                      field_type=FieldType.BUFFER,
                                      dtype=gpu_src_arr.dtype)

        pack_eqs = []
        # Since we are packing all cell values for all cells, then
        # the buffer index is equivalent to the field index
        for idx in range(num_cell_values):
            eq = Assignment(buffer(idx), src_field(idx))
            pack_eqs.append(eq)

        pack_types = {
            'src_field': gpu_src_arr.dtype,
            'buffer': gpu_buffer_arr.dtype
        }
        pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)
        pack_kernel = make_python_function(pack_code)
        pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)

        unpack_eqs = []

        for idx in range(num_cell_values):
            eq = Assignment(dst_field(idx), buffer(idx))
            unpack_eqs.append(eq)

        unpack_types = {
            'dst_field': gpu_dst_arr.dtype,
            'buffer': gpu_buffer_arr.dtype
        }
        unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types)
        unpack_kernel = make_python_function(unpack_code)
        unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)

        dst_arr = gpu_dst_arr.get()

        np.testing.assert_equal(src_arr, dst_arr)

Example #4

0

Show file

def test_variable_sized_fields():
    src_field = Field.create_generic('src', spatial_dimensions=2)
    dst_field = Field.create_generic('dst', spatial_dimensions=2)

    update_rule = Assignment(dst_field[0, 0],
                             (src_field[0, 1] + src_field[0, -1] +
                              src_field[1, 0] + src_field[-1, 0]) / 4)

    ast = create_cuda_kernel(sympy_cse_on_assignment_list([update_rule]))
    kernel = make_python_function(ast)

    size = (3, 3)
    src_arr = np.random.rand(*size)
    src_arr = add_ghost_layers(src_arr)
    dst_arr = np.zeros_like(src_arr)

    gpu_src_arr = gpuarray.to_gpu(src_arr)
    gpu_dst_arr = gpuarray.to_gpu(dst_arr)
    kernel(src=gpu_src_arr, dst=gpu_dst_arr)
    gpu_dst_arr.get(dst_arr)

    stencil = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) / 4.0
    reference = convolve(remove_ghost_layers(src_arr),
                         stencil,
                         mode='constant',
                         cval=0.0)
    reference = add_ghost_layers(reference)
    np.testing.assert_almost_equal(reference, dst_arr)

Example #5

0

Show file

def test_ghost_layer():
    size = (6, 5)
    src_arr = np.ones(size)
    dst_arr = np.zeros_like(src_arr)
    src_field = Field.create_from_numpy_array('src',
                                              src_arr,
                                              index_dimensions=0)
    dst_field = Field.create_from_numpy_array('dst',
                                              dst_arr,
                                              index_dimensions=0)

    update_rule = Assignment(dst_field[0, 0], src_field[0, 0])
    ghost_layers = [(1, 2), (2, 1)]
    ast = create_cuda_kernel([update_rule],
                             ghost_layers=ghost_layers,
                             indexing_creator=LineIndexing)
    kernel = make_python_function(ast)

    gpu_src_arr = gpuarray.to_gpu(src_arr)
    gpu_dst_arr = gpuarray.to_gpu(dst_arr)
    kernel(src=gpu_src_arr, dst=gpu_dst_arr)
    gpu_dst_arr.get(dst_arr)

    reference = np.zeros_like(src_arr)
    reference[ghost_layers[0][0]:-ghost_layers[0][1],
              ghost_layers[1][0]:-ghost_layers[1][1]] = 1
    np.testing.assert_equal(reference, dst_arr)

Example #6

0

Show file

def test_field_layouts():
    num_cell_values = 7
    for layout_str in ['numpy', 'fzyx', 'zyxf', 'reverse_numpy']:
        fields = _generate_fields(stencil_directions=num_cell_values,
                                  layout=layout_str)
        for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields:
            src_field = Field.create_from_numpy_array("src_field",
                                                      gpu_src_arr,
                                                      index_dimensions=1)
            dst_field = Field.create_from_numpy_array("dst_field",
                                                      gpu_src_arr,
                                                      index_dimensions=1)
            buffer = Field.create_generic("buffer",
                                          spatial_dimensions=1,
                                          index_dimensions=1,
                                          field_type=FieldType.BUFFER,
                                          dtype=src_arr.dtype)

            pack_eqs = []
            # Since we are packing all cell values for all cells, then
            # the buffer index is equivalent to the field index
            for idx in range(num_cell_values):
                eq = Assignment(buffer(idx), src_field(idx))
                pack_eqs.append(eq)

            pack_types = {
                'src_field': gpu_src_arr.dtype,
                'buffer': gpu_buffer_arr.dtype
            }
            pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)
            pack_kernel = make_python_function(pack_code)
            pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)

            unpack_eqs = []

            for idx in range(num_cell_values):
                eq = Assignment(dst_field(idx), buffer(idx))
                unpack_eqs.append(eq)

            unpack_types = {
                'dst_field': gpu_dst_arr.dtype,
                'buffer': gpu_buffer_arr.dtype
            }
            unpack_code = create_cuda_kernel(unpack_eqs,
                                             type_info=unpack_types)
            unpack_kernel = make_python_function(unpack_code)
            unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)

Example #7

0

Show file

def test_field_slice():
    """Tests (un)packing slices of a scalar field (from)to a buffer."""
    fields = _generate_fields()
    for d in ['N', 'S', 'NW', 'SW', 'TNW', 'B']:
        for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields:
            # Extract slice from N direction of the field
            slice_dir = direction_string_to_offset(d, dim=len(src_arr.shape))
            pack_slice = get_slice_before_ghost_layer(slice_dir)
            unpack_slice = get_ghost_region_slice(slice_dir)

            src_field = Field.create_from_numpy_array("src_field",
                                                      src_arr[pack_slice])
            dst_field = Field.create_from_numpy_array("dst_field",
                                                      src_arr[unpack_slice])
            buffer = Field.create_generic("buffer",
                                          spatial_dimensions=1,
                                          field_type=FieldType.BUFFER,
                                          dtype=src_arr.dtype)

            pack_eqs = [Assignment(buffer.center(), src_field.center())]
            pack_types = {
                'src_field': gpu_src_arr.dtype,
                'buffer': gpu_buffer_arr.dtype
            }
            pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)

            pack_kernel = make_python_function(pack_code)
            pack_kernel(buffer=gpu_buffer_arr,
                        src_field=gpu_src_arr[pack_slice])

            # Unpack into ghost layer of dst_field in N direction
            unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
            unpack_types = {
                'dst_field': gpu_dst_arr.dtype,
                'buffer': gpu_buffer_arr.dtype
            }
            unpack_code = create_cuda_kernel(unpack_eqs,
                                             type_info=unpack_types)

            unpack_kernel = make_python_function(unpack_code)
            unpack_kernel(buffer=gpu_buffer_arr,
                          dst_field=gpu_dst_arr[unpack_slice])

            dst_arr = gpu_dst_arr.get()

            np.testing.assert_equal(src_arr[pack_slice], dst_arr[unpack_slice])

Example #8

0

Show file

def test_setting_value():
    arr_cpu = np.arange(25, dtype=np.float64).reshape(5, 5)
    arr_gpu = gpuarray.to_gpu(arr_cpu)

    iteration_slice = make_slice[:, :]
    f = Field.create_generic("f", 2)
    update_rule = [Assignment(f(0), sp.Symbol("value"))]
    ast = create_cuda_kernel(update_rule,
                             iteration_slice=iteration_slice,
                             indexing_creator=LineIndexing)

    kernel = make_python_function(ast)
    kernel(f=arr_gpu, value=np.float64(42.0))
    np.testing.assert_equal(arr_gpu.get(), np.ones((5, 5)) * 42.0)

Example #9

0

Show file

def test_multiple_index_dimensions():
    """Sums along the last axis of a numpy array"""
    src_size = (7, 6, 4)
    dst_size = src_size[:2]
    src_arr = np.asfortranarray(np.random.rand(*src_size))
    dst_arr = np.zeros(dst_size)

    src_field = Field.create_from_numpy_array('src',
                                              src_arr,
                                              index_dimensions=1)
    dst_field = Field.create_from_numpy_array('dst',
                                              dst_arr,
                                              index_dimensions=0)

    offset = (-2, -1)
    update_rule = Assignment(
        dst_field[0, 0],
        sum([src_field[offset[0], offset[1]](i) for i in range(src_size[-1])]))

    ast = create_cuda_kernel([update_rule])
    kernel = make_python_function(ast)

    gpu_src_arr = gpuarray.to_gpu(src_arr)
    gpu_dst_arr = gpuarray.to_gpu(dst_arr)
    kernel(src=gpu_src_arr, dst=gpu_dst_arr)
    gpu_dst_arr.get(dst_arr)

    reference = np.zeros_like(dst_arr)
    gl = np.max(np.abs(np.array(offset, dtype=int)))
    for x in range(gl, src_size[0] - gl):
        for y in range(gl, src_size[1] - gl):
            reference[x, y] = sum([
                src_arr[x + offset[0], y + offset[1], i]
                for i in range(src_size[2])
            ])

    np.testing.assert_almost_equal(reference, dst_arr)

Example #10

0

Show file

def create_kernel(assignments,
                  target='cpu',
                  data_type="double",
                  iteration_slice=None,
                  ghost_layers=None,
                  skip_independence_check=False,
                  cpu_openmp=False,
                  cpu_vectorize_info=None,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({})):
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.

    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
        target: 'cpu', 'llvm' or 'gpu'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
        ghost_layers: if left to default, the number of necessary ghost layers is determined automatically
                     a single integer specifies the ghost layer count at all borders, can also be a sequence of
                     pairs ``[(x_lower_gl, x_upper_gl), .... ]``
        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
                            for documentation of these parameters see vectorize function. Example:
                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'

    Returns:
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
        can be compiled with through its 'compile()' member

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
    """
    # ----  Normalizing parameters
    split_groups = ()
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
    if isinstance(assignments, Assignment):
        assignments = [assignments]

    # ----  Creating ast
    if target == 'cpu':
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
        ast = create_kernel(assignments,
                            type_info=data_type,
                            split_groups=split_groups,
                            iteration_slice=iteration_slice,
                            ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse)
        if cpu_vectorize_info:
            if cpu_vectorize_info is True:
                vectorize(ast)
            elif isinstance(cpu_vectorize_info, dict):
                vectorize(ast, **cpu_vectorize_info)
            else:
                raise ValueError("Invalid value for cpu_vectorize_info")
        return ast
    elif target == 'llvm':
        from pystencils.llvm import create_kernel
        ast = create_kernel(assignments,
                            type_info=data_type,
                            split_groups=split_groups,
                            iteration_slice=iteration_slice,
                            ghost_layers=ghost_layers)
        return ast
    elif target == 'gpu':
        from pystencils.gpucuda import create_cuda_kernel
        ast = create_cuda_kernel(
            assignments,
            type_info=data_type,
            indexing_creator=indexing_creator_from_params(
                gpu_indexing, gpu_indexing_params),
            iteration_slice=iteration_slice,
            ghost_layers=ghost_layers,
            skip_independence_check=skip_independence_check)
        return ast
    else:
        raise ValueError(
            "Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " %
            (target, ))

Example #11

0

Show file

def create_domain_kernel(assignments: List[Assignment], *,
                         config: CreateKernelConfig):
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.

    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
        config: CreateKernelConfig which includes the needed configuration

    Returns:
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
        can be compiled with through its 'compile()' member

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> kernel_config = ps.CreateKernelConfig(cpu_openmp=True)
        >>> kernel_ast = ps.kernelcreation.create_domain_kernel([assignment], config=kernel_config)
        >>> kernel = kernel_ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
    """
    # --- applying first default simplifications
    try:
        if config.default_assignment_simplifications and isinstance(
                assignments, AssignmentCollection):
            simplification = create_simplification_strategy()
            assignments = simplification(assignments)
    except Exception as e:
        warnings.warn(
            f"It was not possible to apply the default pystencils optimisations to the "
            f"AssignmentCollection due to the following problem :{e}")

    # ----  Normalizing parameters
    split_groups = ()
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments

    try:
        if config.default_assignment_simplifications:
            assignments = apply_sympy_optimisations(assignments)
    except Exception as e:
        warnings.warn(
            f"It was not possible to apply the default SymPy optimisations to the "
            f"Assignments due to the following problem :{e}")

    # ----  Creating ast
    ast = None
    if config.target == Target.CPU:
        if config.backend == Backend.C:
            from pystencils.cpu import add_openmp, create_kernel
            ast = create_kernel(
                assignments,
                function_name=config.function_name,
                type_info=config.data_type,
                split_groups=split_groups,
                iteration_slice=config.iteration_slice,
                ghost_layers=config.ghost_layers,
                skip_independence_check=config.skip_independence_check)
            for optimization in config.cpu_prepend_optimizations:
                optimization(ast)
            omp_collapse = None
            if config.cpu_blocking:
                omp_collapse = loop_blocking(ast, config.cpu_blocking)
            if config.cpu_openmp:
                add_openmp(ast,
                           num_threads=config.cpu_openmp,
                           collapse=omp_collapse,
                           assume_single_outer_loop=config.omp_single_loop)
            if config.cpu_vectorize_info:
                if config.cpu_vectorize_info is True:
                    vectorize(ast)
                elif isinstance(config.cpu_vectorize_info, dict):
                    vectorize(ast, **config.cpu_vectorize_info)
                    if config.cpu_openmp and config.cpu_blocking and 'nontemporal' in config.cpu_vectorize_info and \
                            config.cpu_vectorize_info['nontemporal'] and 'cachelineZero' in ast.instruction_set:
                        # This condition is stricter than it needs to be: if blocks along the fastest axis start on a
                        # cache line boundary, it's okay. But we cannot determine that here.
                        # We don't need to disallow OpenMP collapsing because it is never applied to the inner loop.
                        raise ValueError(
                            "Blocking cannot be combined with cacheline-zeroing"
                        )
                else:
                    raise ValueError("Invalid value for cpu_vectorize_info")
    elif config.target == Target.GPU:
        if config.backend == Backend.CUDA:
            from pystencils.gpucuda import create_cuda_kernel
            ast = create_cuda_kernel(
                assignments,
                function_name=config.function_name,
                type_info=config.data_type,
                indexing_creator=indexing_creator_from_params(
                    config.gpu_indexing, config.gpu_indexing_params),
                iteration_slice=config.iteration_slice,
                ghost_layers=config.ghost_layers,
                skip_independence_check=config.skip_independence_check)

    if not ast:
        raise NotImplementedError(
            f'{config.target} together with {config.backend} is not supported by `create_domain_kernel`'
        )

    if config.use_auto_for_assignments:
        for a in ast.atoms(SympyAssignment):
            a.use_auto = True

    return ast

Example #12

0

Show file

File: macroscopic_value_kernels.py Project: mabau/lbmpy

def compile_macroscopic_values_getter(lb_method,
                                      output_quantities,
                                      pdf_arr=None,
                                      ghost_layers=1,
                                      iteration_slice=None,
                                      field_layout='numpy',
                                      target=Target.CPU,
                                      streaming_pattern='pull',
                                      previous_timestep=Timestep.BOTH):
    """
    Create kernel to compute macroscopic value(s) from a pdf field (e.g. density or velocity)

    Args:
        lb_method: instance of :class:`lbmpy.methods.AbstractLbMethod`
        output_quantities: sequence of quantities to compute e.g. ['density', 'velocity']
        pdf_arr: optional numpy array for pdf field - used to get optimal loop structure for kernel
        ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
                      that should be excluded from the iteration. If None, the number of ghost layers 
                      is determined automatically and assumed to be equal for all dimensions.        
        iteration_slice: if not None, iteration is done only over this slice of the field
        field_layout: layout for output field, also used for pdf field if pdf_arr is not given
        target: `Target.CPU` or `Target.GPU`
        previous_step_accessor: The accessor used by the streaming pattern of the previous timestep

    Returns:
        a function to compute macroscopic values:
        - pdf_array
        - keyword arguments from name of conserved quantity (as in output_quantities) to numpy field
    """
    if not (isinstance(output_quantities, list)
            or isinstance(output_quantities, tuple)):
        output_quantities = [output_quantities]

    cqc = lb_method.conserved_quantity_computation
    unknown_quantities = [
        oq for oq in output_quantities if oq not in cqc.conserved_quantities
    ]
    if unknown_quantities:
        raise ValueError(
            "No such conserved quantity: %s, conserved quantities are %s" %
            (str(unknown_quantities), str(cqc.conserved_quantities.keys())))

    if pdf_arr is None:
        pdf_field = Field.create_generic('pdfs',
                                         lb_method.dim,
                                         index_dimensions=1,
                                         layout=field_layout)
    else:
        pdf_field = Field.create_from_numpy_array('pdfs',
                                                  pdf_arr,
                                                  index_dimensions=1)

    output_mapping = {}
    for output_quantity in output_quantities:
        number_of_elements = cqc.conserved_quantities[output_quantity]
        assert number_of_elements >= 1

        ind_dims = 0 if number_of_elements <= 1 else 1
        if pdf_arr is None:
            output_field = Field.create_generic(output_quantity,
                                                lb_method.dim,
                                                layout=field_layout,
                                                index_dimensions=ind_dims)
        else:
            output_field_shape = pdf_arr.shape[:-1]
            if ind_dims > 0:
                output_field_shape += (number_of_elements, )
                field_layout = get_layout_of_array(pdf_arr)
            else:
                field_layout = get_layout_of_array(
                    pdf_arr, index_dimension_ids=[len(pdf_field.shape) - 1])
            output_field = Field.create_fixed_size(output_quantity,
                                                   output_field_shape,
                                                   ind_dims, pdf_arr.dtype,
                                                   field_layout)

        output_mapping[output_quantity] = [
            output_field(i) for i in range(number_of_elements)
        ]
        if len(output_mapping[output_quantity]) == 1:
            output_mapping[output_quantity] = output_mapping[output_quantity][
                0]

    stencil = lb_method.stencil
    previous_step_accessor = get_accessor(streaming_pattern, previous_timestep)
    pdf_symbols = previous_step_accessor.write(pdf_field, stencil)

    eqs = cqc.output_equations_from_pdfs(pdf_symbols,
                                         output_mapping).all_assignments

    if target == Target.CPU:
        import pystencils.cpu as cpu
        kernel = cpu.make_python_function(
            cpu.create_kernel(eqs,
                              ghost_layers=ghost_layers,
                              iteration_slice=iteration_slice))
    elif target == Target.GPU:
        import pystencils.gpucuda as gpu
        kernel = gpu.make_python_function(
            gpu.create_cuda_kernel(eqs,
                                   ghost_layers=ghost_layers,
                                   iteration_slice=iteration_slice))
    else:
        raise ValueError(
            "Unknown target '%s'. Possible targets are `Target.CPU` and `Target.GPU`"
            % (target, ))

    def getter(pdfs, **kwargs):
        if pdf_arr is not None:
            assert pdfs.shape == pdf_arr.shape and pdfs.strides == pdf_arr.strides, \
                "Pdf array not matching blueprint which was used to compile" + str(pdfs.shape) + str(pdf_arr.shape)
        if not set(output_quantities).issubset(kwargs.keys()):
            raise ValueError(
                "You have to specify the output field for each of the following quantities: %s"
                % (str(output_quantities), ))
        kernel(pdfs=pdfs, **kwargs)

    return getter

Example #13

0

Show file

File: macroscopic_value_kernels.py Project: mabau/lbmpy

def compile_macroscopic_values_setter(lb_method,
                                      quantities_to_set,
                                      pdf_arr=None,
                                      ghost_layers=1,
                                      iteration_slice=None,
                                      field_layout='numpy',
                                      target=Target.CPU,
                                      streaming_pattern='pull',
                                      previous_timestep=Timestep.BOTH):
    """
    Creates a function that sets a pdf field to specified macroscopic quantities
    The returned function can be called with the pdf field to set as single argument

    Args:
        lb_method: instance of :class:`lbmpy.methods.AbstractLbMethod`
        quantities_to_set: map from conserved quantity name to fixed value or numpy array
        pdf_arr: optional numpy array for pdf field - used to get optimal loop structure for kernel
        ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
                      that should be excluded from the iteration. If None, the number of ghost layers 
                      is determined automatically and assumed to be equal for all dimensions.        
        iteration_slice: if not None, iteration is done only over this slice of the field
        field_layout: layout of the pdf field if pdf_arr was not given
        target: `Target.CPU` or `Target.GPU`
        previous_step_accessor: The accessor used by the streaming pattern of the previous timestep

    Returns:
        function taking pdf array as single argument and which sets the field to the given values
    """
    if pdf_arr is not None:
        pdf_field = Field.create_from_numpy_array('pdfs',
                                                  pdf_arr,
                                                  index_dimensions=1)
    else:
        pdf_field = Field.create_generic('pdfs',
                                         lb_method.dim,
                                         index_dimensions=1,
                                         layout=field_layout)

    fixed_kernel_parameters = {}
    cqc = lb_method.conserved_quantity_computation

    value_map = {}
    at_least_one_field_input = False
    for quantity_name, value in quantities_to_set.items():
        if hasattr(value, 'shape'):
            fixed_kernel_parameters[quantity_name] = value
            at_least_one_field_input = True
            num_components = cqc.conserved_quantities[quantity_name]
            field = Field.create_from_numpy_array(
                quantity_name,
                value,
                index_dimensions=0 if num_components <= 1 else 1)
            if num_components == 1:
                value = field(0)
            else:
                value = [field(i) for i in range(num_components)]

        value_map[quantity_name] = value

    cq_equations = cqc.equilibrium_input_equations_from_init_values(
        **value_map, force_substitution=False)

    eq = lb_method.get_equilibrium(conserved_quantity_equations=cq_equations)
    if at_least_one_field_input:
        simplification = create_simplification_strategy(lb_method)
        eq = simplification(eq)
    else:
        eq = eq.new_without_subexpressions()

    previous_step_accessor = get_accessor(streaming_pattern, previous_timestep)
    write_accesses = previous_step_accessor.write(pdf_field, lb_method.stencil)

    substitutions = {
        sym: write_accesses[i]
        for i, sym in enumerate(lb_method.post_collision_pdf_symbols)
    }
    eq = eq.new_with_substitutions(substitutions).all_assignments

    if target == Target.CPU:
        import pystencils.cpu as cpu
        kernel = cpu.make_python_function(cpu.create_kernel(eq))
        kernel = functools.partial(kernel, **fixed_kernel_parameters)
    elif target == Target.GPU:
        import pystencils.gpucuda as gpu
        kernel = gpu.make_python_function(gpu.create_cuda_kernel(eq))
        kernel = functools.partial(kernel, **fixed_kernel_parameters)
    else:
        raise ValueError(
            "Unknown target '%s'. Possible targets are `Target.CPU` and `Target.GPU`"
            % (target, ))

    def setter(pdfs, **kwargs):
        if pdf_arr is not None:
            assert pdfs.shape == pdf_arr.shape and pdfs.strides == pdf_arr.strides, \
                "Pdf array not matching blueprint which was used to compile" + str(pdfs.shape) + str(pdf_arr.shape)
        kernel(pdfs=pdfs, **kwargs)

    return setter