Beispiel #1
0
    def _padding(self, nodes, state):
        """
        Introduce temporary buffers padded to the nearest multiple of the vector
        length, to maximize data alignment. At the bottom of the kernel, the
        values in the padded temporaries will be copied back into the input arrays.
        """
        mapper = OrderedDict()

        # Assess feasibility of the transformation
        handle = FindSymbols('symbolics-writes').visit(nodes)
        if not handle:
            return nodes, {}
        shape = max([i.shape for i in handle], key=len)
        if not shape:
            return nodes, {}
        candidates = [i for i in handle if i.shape[-1] == shape[-1]]
        if not candidates:
            return nodes, {}

        # Retrieve the maximum number of items in a SIMD register when processing
        # the expressions in /node/
        exprs = FindNodes(Expression).visit(nodes)
        exprs = [e for e in exprs if e.write in candidates]
        assert len(exprs) > 0
        dtype = exprs[0].dtype
        assert all(e.dtype == dtype for e in exprs)
        try:
            simd_items = get_simd_items(dtype)
        except KeyError:
            # Fallback to 16 (maximum expectable padding, for AVX512 registers)
            simd_items = simdinfo['avx512f'] / np.dtype(dtype).itemsize

        shapes = {
            k: k.shape[:-1] + (roundm(k.shape[-1], simd_items), )
            for k in candidates
        }
        mapper.update(
            OrderedDict([(k.indexed,
                          Array(name='p%s' % k.name,
                                shape=shapes[k],
                                dimensions=k.indices,
                                onstack=k._mem_stack).indexed)
                         for k in candidates]))

        # Substitute original arrays with padded buffers
        processed = SubstituteExpression(mapper).visit(nodes)

        # Build Iteration trees for initialization and copy-back of padded arrays
        mapper = OrderedDict([(k, v) for k, v in mapper.items()
                              if k.function.is_SymbolicFunction])
        init = copy_arrays(mapper, reverse=True)
        copyback = copy_arrays(mapper)

        processed = List(body=init + as_tuple(processed) + copyback)

        return processed, {}
Beispiel #2
0
def simple_function_with_paddable_arrays(a_dense, b_dense, exprs, iters):
    # void foo(a_dense, b_dense)
    #   for i
    #     for j
    #       for k
    #         expr0
    symbols = [i.base.function for i in [a_dense, b_dense]]
    body = iters[0](iters[1](iters[2](exprs[6])))
    f = Callable('foo', body, 'void', symbols, ())
    f, subs = ResolveTimeStepping().visit(f)
    f = SubstituteExpression(subs=subs).visit(f)
    return f
Beispiel #3
0
def simple_function_fissionable(a, b, exprs, iters):
    # void foo(a, b)
    #   for i
    #     for j
    #       for k
    #         expr0
    #         expr2
    symbols = [i.base.function for i in [a, b]]
    body = iters[0](iters[1](iters[2]([exprs[0], exprs[2]])))
    f = Callable('foo', body, 'void', symbols, ())
    f, subs = ResolveTimeStepping().visit(f)
    f = SubstituteExpression(subs=subs).visit(f)
    return f
Beispiel #4
0
def complex_function(a, b, c, d, exprs, iters):
    # void foo(a, b, c, d)
    #   for i
    #     for s
    #       expr0
    #     for j
    #       for k
    #         expr1
    #         expr2
    #     for p
    #       expr3
    symbols = [i.base.function for i in [a, b, c, d]]
    body = iters[0]([
        iters[3](exprs[2]), iters[1](iters[2]([exprs[3], exprs[4]])),
        iters[4](exprs[5])
    ])
    f = Callable('foo', body, 'void', symbols, ())
    f, subs = ResolveTimeStepping().visit(f)
    f = SubstituteExpression(subs=subs).visit(f)
    return f
Beispiel #5
0
def iet_build(clusters, dtype):
    """
    Create an Iteration/Expression tree (IET) given an iterable of :class:`Cluster`s.
    The nodes in the returned IET are decorated with properties deriving from
    data dependence analysis.
    """
    # Clusters -> Iteration/Expression tree
    iet = iet_make(clusters, dtype)

    # Data dependency analysis. Properties are attached directly to nodes
    iet = iet_analyze(iet)

    # Substitute derived dimensions (e.g., t -> t0, t + 1 -> t1)
    # This is postponed up to this point to ease /iet_analyze/'s life
    subs = {}
    for tree in retrieve_iteration_tree(iet):
        uindices = flatten(i.uindices for i in tree)
        subs.update({i.expr: LoweredDimension(name=i.index.name, origin=i.expr)
                     for i in uindices})
    iet = SubstituteExpression(subs).visit(iet)

    return iet
Beispiel #6
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        time_axis = kwargs.get("time_axis", Forward)
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self.func_table = OrderedDict()

        # Expression lowering
        expressions = [indexify(s) for s in expressions]
        expressions = [s.xreplace(subs) for s in expressions]

        # Analysis
        self.dtype = retrieve_dtype(expressions)
        self.input, self.output, self.dimensions = retrieve_symbols(
            expressions)
        stencils = make_stencils(expressions)
        self.offsets = {
            d.end_name: v
            for d, v in retrieve_offsets(stencils).items()
        }

        # Set the direction of time acoording to the given TimeAxis
        for time in [d for d in self.dimensions if d.is_Time]:
            if not time.is_Stepping:
                time.reverse = time_axis == Backward

        # Parameters of the Operator (Dimensions necessary for data casts)
        parameters = self.input + self.dimensions

        # Group expressions based on their Stencil and data dependences
        clusters = clusterize(expressions, stencils)

        # Apply the Devito Symbolic Engine (DSE) for symbolic optimization
        clusters = rewrite(clusters, mode=set_dse_mode(dse))

        # Wrap expressions with Iterations according to dimensions
        nodes = self._schedule_expressions(clusters)

        # Data dependency analysis. Properties are attached directly to nodes
        nodes = analyze_iterations(nodes)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes, parameters)

        # Resolve and substitute dimensions for loop index variables
        nodes, subs = ResolveTimeStepping().visit(nodes)
        nodes = SubstituteExpression(subs=subs).visit(nodes)

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize(nodes, parameters)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_arguments = dle_state.arguments
        self.dle_flags = dle_state.flags
        self.func_table.update(
            OrderedDict([(i.name, FunMeta(i, True))
                         for i in dle_state.elemental_functions]))
        parameters.extend([i.argument for i in self.dle_arguments])
        self.dimensions.extend([
            i.argument for i in self.dle_arguments
            if isinstance(i.argument, Dimension)
        ])
        self._includes.extend(list(dle_state.includes))

        # Introduce all required C declarations
        nodes = self._insert_declarations(dle_state.nodes)

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())