Python vectorizeの例、pystencils.cpu.vectorization.vectorize Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_vectorization.py プロジェクト: theHamsta/pystencils

def test_vectorization_fixed_size():
    configurations = []
    # Fixed size - multiple of four
    arr = np.ones((20 + 2, 24 + 2)) * 5.0
    f, g = ps.fields(f=arr, g=arr)
    configurations.append((arr, f, g))
    # Fixed size - no multiple of four
    arr = np.ones((21 + 2, 25 + 2)) * 5.0
    f, g = ps.fields(f=arr, g=arr)
    configurations.append((arr, f, g))
    # Fixed size - different remainder
    arr = np.ones((23 + 2, 17 + 2)) * 5.0
    f, g = ps.fields(f=arr, g=arr)
    configurations.append((arr, f, g))

    for arr, f, g in configurations:
        update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]

        ast = ps.create_kernel(update_rule)
        vectorize(ast)

        func = ast.compile()
        dst = np.zeros_like(arr)
        func(g=dst, f=arr)
        np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)

コード例 #2

0

ファイルを表示

def test_piecewise3(instruction_set=instruction_set):
    arr = np.zeros((22, 22))

    @ps.kernel
    def test_kernel(s):
        f, g = ps.fields(f=arr, g=arr)
        s.b @= f[0, 1]
        g[0, 0] @= 1.0 / (s.b + s.k) if f[0, 0] > 0.0 else 1.0

    ast = ps.create_kernel(test_kernel)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

コード例 #3

0

ファイルを表示

ファイル: test_vectorization.py プロジェクト: theHamsta/pystencils

def test_logical_operators():
    arr = np.zeros((22, 22))

    @ps.kernel
    def test_kernel(s):
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.And(f[0, 1] < 0.0, f[1, 0] < 0.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

    ast = ps.create_kernel(test_kernel)
    vectorize(ast)
    ast.compile()

コード例 #4

0

ファイルを表示

ファイル: test_vectorization.py プロジェクト: theHamsta/pystencils

def test_vectorization_variable_size():
    f, g = ps.fields("f, g : double[2D]")
    update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
    ast = ps.create_kernel(update_rule)

    replace_inner_stride_with_one(ast)
    vectorize(ast)
    func = ast.compile()

    arr = np.ones((23 + 2, 17 + 2)) * 5.0
    dst = np.zeros_like(arr)

    func(g=dst, f=arr)
    np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)

コード例 #5

0

ファイルを表示

ファイル: test_vectorization.py プロジェクト: theHamsta/pystencils

def test_piecewise1():
    a, b, c, d, e = sp.symbols("a b c d e")
    arr = np.ones((2 ** 3 + 2, 2 ** 4 + 2)) * 5.0

    f, g = ps.fields(f=arr, g=arr)
    update_rule = [ps.Assignment(a, f[1, 0]),
                   ps.Assignment(b, a),
                   ps.Assignment(c, f[0, 0] > 0.0),
                   ps.Assignment(g[0, 0], sp.Piecewise((b + 3 + f[0, 1], c), (0.0, True)))]

    ast = ps.create_kernel(update_rule)
    vectorize(ast)
    func = ast.compile()
    dst = np.zeros_like(arr)
    func(g=dst, f=arr)
    np.testing.assert_equal(dst[1:-1, 1:-1], 5 + 3 + 5.0)

コード例 #6

0

ファイルを表示

def test_piecewise2(instruction_set=instruction_set):
    arr = np.zeros((20, 20))

    @ps.kernel
    def test_kernel(s):
        f, g = ps.fields(f=arr, g=arr)

        s.condition @= f[0, 0] > 1
        s.result @= 0.0 if s.condition else 1.0
        g[0, 0] @= s.result

    ast = ps.create_kernel(test_kernel)
    vectorize(ast, instruction_set=instruction_set)
    func = ast.compile()
    func(f=arr, g=arr)
    np.testing.assert_equal(arr, np.ones_like(arr))

コード例 #7

0

ファイルを表示

ファイル: test_vectorization.py プロジェクト: theHamsta/pystencils

def test_vector_type_propagation():
    a, b, c, d, e = sp.symbols("a b c d e")
    arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2))
    arr *= 10.0

    f, g = ps.fields(f=arr, g=arr)
    update_rule = [ps.Assignment(a, f[1, 0]),
                   ps.Assignment(b, a),
                   ps.Assignment(g[0, 0], b + 3 + f[0, 1])]

    ast = ps.create_kernel(update_rule)
    vectorize(ast)

    func = ast.compile()
    dst = np.zeros_like(arr)
    func(g=dst, f=arr)
    np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3)

コード例 #8

0

ファイルを表示

def test_logical_operators(instruction_set=instruction_set):
    arr = np.zeros((22, 22))

    @ps.kernel
    def kernel_and(s):
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.And(f[0, 1] < 0.0, f[1, 0] < 0.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

    ast = ps.create_kernel(kernel_and)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    @ps.kernel
    def kernel_or(s):
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.Or(f[0, 1] < 0.0, f[1, 0] < 0.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

    ast = ps.create_kernel(kernel_or)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    @ps.kernel
    def kernel_equal(s):
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.Eq(f[0, 1], 2.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

    ast = ps.create_kernel(kernel_equal)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

コード例 #9

0

ファイルを表示

def test_vectorised_fast_approximations(instruction_set=instruction_set):
    arr = np.zeros((24, 24))
    f, g = ps.fields(f=arr, g=arr)

    expr = sp.sqrt(f[0, 0] + f[1, 0])
    assignment = ps.Assignment(g[0, 0], insert_fast_sqrts(expr))
    ast = ps.create_kernel(assignment)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    expr = f[0, 0] / f[1, 0]
    assignment = ps.Assignment(g[0, 0], insert_fast_divisions(expr))
    ast = ps.create_kernel(assignment)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    assignment = ps.Assignment(sp.Symbol("tmp"),
                               3 / sp.sqrt(f[0, 0] + f[1, 0]))
    ast = ps.create_kernel(insert_fast_sqrts(assignment))
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

コード例 #10

0

ファイルを表示

def create_staggered_kernel(staggered_field,
                            expressions,
                            subexpressions=(),
                            target='cpu',
                            gpu_exclusive_conditions=False,
                            **kwargs):
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    Args:
        staggered_field: field where the first index coordinate defines the location of the staggered value
                can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location
                a vector is stored, expressions parameter has to be a sequence of sequences then
                where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell
                boundary and ``f[0,0](1)`` the southern cell boundary etc.
        expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary
                     should be updated.
        subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
        target: 'cpu' or 'gpu'
        gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
        kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
    """
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
    assert staggered_field.index_dimensions in (
        1, 2), 'Staggered field must have one or two index dimensions'
    dim = staggered_field.spatial_dimensions

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
    ]
    conditions = [
        counters[i] < staggered_field.shape[i] - 1 for i in range(dim)
    ]
    assert len(expressions) == dim
    if staggered_field.index_dimensions == 2:
        assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \
            "If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \
            "same length."

    final_assignments = []
    last_conditional = None

    def add(condition, dimensions, as_else_block=False):
        nonlocal last_conditional
        if staggered_field.index_dimensions == 1:
            assignments = [
                Assignment(staggered_field(d), expressions[d])
                for d in dimensions
            ]
            a_coll = AssignmentCollection(assignments, list(subexpressions))
            a_coll = a_coll.new_filtered(
                [staggered_field(d) for d in dimensions])
        elif staggered_field.index_dimensions == 2:
            assert staggered_field.has_fixed_index_shape
            assignments = [
                Assignment(staggered_field(d, i), expr) for d in dimensions
                for i, expr in enumerate(expressions[d])
            ]
            a_coll = AssignmentCollection(assignments, list(subexpressions))
            a_coll = a_coll.new_filtered([
                staggered_field(d, i)
                for i in range(staggered_field.index_shape[1])
                for d in dimensions
            ])
        sp_assignments = [
            SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments
        ]
        if as_else_block and last_conditional:
            new_cond = Conditional(condition, Block(sp_assignments))
            last_conditional.false_block = Block([new_cond])
            last_conditional = new_cond
        else:
            last_conditional = Conditional(condition, Block(sp_assignments))
            final_assignments.append(last_conditional)

    if target == 'cpu' or not gpu_exclusive_conditions:
        for d in range(dim):
            cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
            add(cond, [d])
    elif target == 'gpu':
        full_conditions = [
            sp.And(*[conditions[i] for i in range(dim) if d != i])
            for d in range(dim)
        ]
        for include in itertools.product(*[[1, 0]] * dim):
            case_conditions = sp.And(*[
                c if value else sp.Not(c)
                for c, value in zip(full_conditions, include)
            ])
            dimensions_to_include = [i for i in range(dim) if include[i]]
            if dimensions_to_include:
                add(case_conditions, dimensions_to_include, True)

    ghost_layers = [(1, 0)] * dim

    blocking = kwargs.get('cpu_blocking', None)
    if blocking:
        del kwargs['cpu_blocking']

    cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None)
    if cpu_vectorize_info:
        del kwargs['cpu_vectorize_info']
    openmp = kwargs.get('cpu_openmp', None)
    if openmp:
        del kwargs['cpu_openmp']

    ast = create_kernel(final_assignments,
                        ghost_layers=ghost_layers,
                        target=target,
                        **kwargs)

    if target == 'cpu':
        remove_conditionals_in_staggered_kernel(ast)
        move_constants_before_loop(ast)
        omp_collapse = None
        if blocking:
            omp_collapse = loop_blocking(ast, blocking)
        if openmp:
            from pystencils.cpu import add_openmp
            add_openmp(ast,
                       num_threads=openmp,
                       collapse=omp_collapse,
                       assume_single_outer_loop=False)
        if cpu_vectorize_info is True:
            vectorize(ast)
        elif isinstance(cpu_vectorize_info, dict):
            vectorize(ast, **cpu_vectorize_info)
    return ast

コード例 #11

0

ファイルを表示

def create_kernel(assignments,
                  target='cpu',
                  data_type="double",
                  iteration_slice=None,
                  ghost_layers=None,
                  skip_independence_check=False,
                  cpu_openmp=False,
                  cpu_vectorize_info=None,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({})):
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.

    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
        target: 'cpu', 'llvm' or 'gpu'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
        ghost_layers: if left to default, the number of necessary ghost layers is determined automatically
                     a single integer specifies the ghost layer count at all borders, can also be a sequence of
                     pairs ``[(x_lower_gl, x_upper_gl), .... ]``
        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
                            for documentation of these parameters see vectorize function. Example:
                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'

    Returns:
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
        can be compiled with through its 'compile()' member

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
    """
    # ----  Normalizing parameters
    split_groups = ()
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
    if isinstance(assignments, Assignment):
        assignments = [assignments]

    # ----  Creating ast
    if target == 'cpu':
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
        ast = create_kernel(assignments,
                            type_info=data_type,
                            split_groups=split_groups,
                            iteration_slice=iteration_slice,
                            ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse)
        if cpu_vectorize_info:
            if cpu_vectorize_info is True:
                vectorize(ast)
            elif isinstance(cpu_vectorize_info, dict):
                vectorize(ast, **cpu_vectorize_info)
            else:
                raise ValueError("Invalid value for cpu_vectorize_info")
        return ast
    elif target == 'llvm':
        from pystencils.llvm import create_kernel
        ast = create_kernel(assignments,
                            type_info=data_type,
                            split_groups=split_groups,
                            iteration_slice=iteration_slice,
                            ghost_layers=ghost_layers)
        return ast
    elif target == 'gpu':
        from pystencils.gpucuda import create_cuda_kernel
        ast = create_cuda_kernel(
            assignments,
            type_info=data_type,
            indexing_creator=indexing_creator_from_params(
                gpu_indexing, gpu_indexing_params),
            iteration_slice=iteration_slice,
            ghost_layers=ghost_layers,
            skip_independence_check=skip_independence_check)
        return ast
    else:
        raise ValueError(
            "Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " %
            (target, ))

コード例 #12

0

ファイルを表示

def create_domain_kernel(assignments: List[Assignment], *,
                         config: CreateKernelConfig):
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.

    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
        config: CreateKernelConfig which includes the needed configuration

    Returns:
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
        can be compiled with through its 'compile()' member

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> kernel_config = ps.CreateKernelConfig(cpu_openmp=True)
        >>> kernel_ast = ps.kernelcreation.create_domain_kernel([assignment], config=kernel_config)
        >>> kernel = kernel_ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
    """
    # --- applying first default simplifications
    try:
        if config.default_assignment_simplifications and isinstance(
                assignments, AssignmentCollection):
            simplification = create_simplification_strategy()
            assignments = simplification(assignments)
    except Exception as e:
        warnings.warn(
            f"It was not possible to apply the default pystencils optimisations to the "
            f"AssignmentCollection due to the following problem :{e}")

    # ----  Normalizing parameters
    split_groups = ()
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments

    try:
        if config.default_assignment_simplifications:
            assignments = apply_sympy_optimisations(assignments)
    except Exception as e:
        warnings.warn(
            f"It was not possible to apply the default SymPy optimisations to the "
            f"Assignments due to the following problem :{e}")

    # ----  Creating ast
    ast = None
    if config.target == Target.CPU:
        if config.backend == Backend.C:
            from pystencils.cpu import add_openmp, create_kernel
            ast = create_kernel(
                assignments,
                function_name=config.function_name,
                type_info=config.data_type,
                split_groups=split_groups,
                iteration_slice=config.iteration_slice,
                ghost_layers=config.ghost_layers,
                skip_independence_check=config.skip_independence_check)
            for optimization in config.cpu_prepend_optimizations:
                optimization(ast)
            omp_collapse = None
            if config.cpu_blocking:
                omp_collapse = loop_blocking(ast, config.cpu_blocking)
            if config.cpu_openmp:
                add_openmp(ast,
                           num_threads=config.cpu_openmp,
                           collapse=omp_collapse,
                           assume_single_outer_loop=config.omp_single_loop)
            if config.cpu_vectorize_info:
                if config.cpu_vectorize_info is True:
                    vectorize(ast)
                elif isinstance(config.cpu_vectorize_info, dict):
                    vectorize(ast, **config.cpu_vectorize_info)
                    if config.cpu_openmp and config.cpu_blocking and 'nontemporal' in config.cpu_vectorize_info and \
                            config.cpu_vectorize_info['nontemporal'] and 'cachelineZero' in ast.instruction_set:
                        # This condition is stricter than it needs to be: if blocks along the fastest axis start on a
                        # cache line boundary, it's okay. But we cannot determine that here.
                        # We don't need to disallow OpenMP collapsing because it is never applied to the inner loop.
                        raise ValueError(
                            "Blocking cannot be combined with cacheline-zeroing"
                        )
                else:
                    raise ValueError("Invalid value for cpu_vectorize_info")
    elif config.target == Target.GPU:
        if config.backend == Backend.CUDA:
            from pystencils.gpucuda import create_cuda_kernel
            ast = create_cuda_kernel(
                assignments,
                function_name=config.function_name,
                type_info=config.data_type,
                indexing_creator=indexing_creator_from_params(
                    config.gpu_indexing, config.gpu_indexing_params),
                iteration_slice=config.iteration_slice,
                ghost_layers=config.ghost_layers,
                skip_independence_check=config.skip_independence_check)

    if not ast:
        raise NotImplementedError(
            f'{config.target} together with {config.backend} is not supported by `create_domain_kernel`'
        )

    if config.use_auto_for_assignments:
        for a in ast.atoms(SympyAssignment):
            a.use_auto = True

    return ast

コード例 #13

0

ファイルを表示

def test_vectorised_pow(instruction_set=instruction_set):
    arr = np.zeros((24, 24))
    f, g = ps.fields(f=arr, g=arr)

    as1 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 2))
    as2 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 0.5))
    as3 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -0.5))
    as4 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 4))
    as5 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -4))
    as6 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -1))

    ast = ps.create_kernel(as1)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    ast = ps.create_kernel(as2)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    ast = ps.create_kernel(as3)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    ast = ps.create_kernel(as4)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    ast = ps.create_kernel(as5)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    ast = ps.create_kernel(as6)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()