Esempio n. 1
0
 def _make_remainder(self, hs, key, callcompute, *args):
     assert callcompute.is_Call
     body = [
         callcompute._rebuild(dynamic_args_mapper=i)
         for _, i in hs.omapper.owned
     ]
     return make_efunc('remainder%d' % key, body)
Esempio n. 2
0
    def _make_poke(self, hs, key, msgs):
        lflag = Symbol(name='lflag')
        gflag = Symbol(name='gflag')

        # Init flags
        body = [Expression(DummyEq(lflag, 0)), Expression(DummyEq(gflag, 1))]

        # For each msg, build an Iteration calling MPI_Test on all peers
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            testrecv = Call(
                'MPI_Test',
                [rrecv, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testsend = Call(
                'MPI_Test',
                [rsend, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            update = AugmentedExpression(DummyEq(gflag, lflag), '&')

            body.append(
                Iteration([testsend, update, testrecv, update], dim,
                          msg.npeers - 1))

        body.append(Return(gflag))

        return make_efunc('pokempi%d' % key, List(body=body), retval='int')
Esempio n. 3
0
 def _make_compute(self, hs, key, msgs, callpoke):
     if hs.body.is_Call:
         return None
     else:
         mapper = {i: List(body=[callpoke, i]) for i in
                   FindNodes(ExpressionBundle).visit(hs.body)}
         iet = Transformer(mapper).visit(hs.body)
         return make_efunc('compute%d' % key, iet, hs.arguments)
Esempio n. 4
0
 def _make_compute(self, hs, key, msgs, callpoke):
     if hs.body.is_Call:
         return None
     else:
         mapper = {i: List(body=[callpoke, i]) for i in
                   FindNodes(ExpressionBundle).visit(hs.body)}
         iet = Transformer(mapper).visit(hs.body)
         return make_efunc('compute%s' % key, iet, hs.dimensions)
Esempio n. 5
0
    def test_make_efuncs(self, exprs, nfuncs, ntimeiters, nests):
        """Test construction of ElementalFunctions."""
        exprs = list(as_tuple(exprs))

        grid = Grid(shape=(10, 10))
        t = grid.stepping_dim  # noqa
        x, y = grid.dimensions  # noqa

        u = Function(name='u', grid=grid)  # noqa
        v = TimeFunction(name='v', grid=grid)  # noqa

        # List comprehension would need explicit locals/globals mappings to eval
        for i, e in enumerate(list(exprs)):
            exprs[i] = eval(e)

        op = Operator(exprs)

        # We create one ElementalFunction for each Iteration nest over space dimensions
        efuncs = []
        for n, tree in enumerate(retrieve_iteration_tree(op)):
            root = filter_iterations(tree,
                                     key=lambda i: i.dim.is_Space,
                                     stop='asap')
            efuncs.append(make_efunc('f%d' % n, root))

        assert len(efuncs) == len(nfuncs) == len(ntimeiters) == len(nests)

        for efunc, nf, nt, nest in zip(efuncs, nfuncs, ntimeiters, nests):
            # Check the `efunc` parameters
            assert all(i in efunc.parameters
                       for i in (x.symbolic_min, x.symbolic_max))
            assert all(i in efunc.parameters
                       for i in (y.symbolic_min, y.symbolic_max))
            functions = FindSymbols().visit(efunc)
            assert len(functions) == nf
            assert all(i in efunc.parameters for i in functions)
            timeiters = [
                i for i in FindSymbols('free-symbols').visit(efunc)
                if i.is_Dimension and i.is_Time
            ]
            assert len(timeiters) == nt
            assert all(i in efunc.parameters for i in timeiters)
            assert len(efunc.parameters) == 4 + len(functions) + len(timeiters)

            # Check there's exactly one ArrayCast for each Function
            assert len(FindNodes(ArrayCast).visit(efunc)) == nf

            # Check the loop nest structure
            trees = retrieve_iteration_tree(efunc)
            assert len(trees) == 1
            tree = trees[0]
            assert all(i.dim.name == j for i, j in zip(tree, nest))

            assert efunc.make_call()
Esempio n. 6
0
    def _make_remainder(self, hs, key, callcompute, region):
        assert callcompute.is_Call

        dim = Dimension(name='i')
        regioni = IndexedPointer(region, dim)
        dynamic_args_mapper = {d: (FieldFromComposite(d.min_name, regioni),
                                   FieldFromComposite(d.max_name, regioni))
                               for d in hs.dimensions}
        iet = callcompute._rebuild(dynamic_args_mapper=dynamic_args_mapper)
        # The -1 below is because an Iteration, by default, generates <=
        iet = Iteration(iet, dim, region.nregions - 1)

        return make_efunc('remainder%s' % key, iet)
Esempio n. 7
0
    def _make_poke(self, hs, key, msgs):
        flag = Symbol(name='flag')
        initflag = LocalExpression(DummyEq(flag, 0))

        body = [initflag]
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testrecv = Call('MPI_Test', [rrecv, Byref(flag), Macro('MPI_STATUS_IGNORE')])
            testsend = Call('MPI_Test', [rsend, Byref(flag), Macro('MPI_STATUS_IGNORE')])

            body.append(Iteration([testsend, testrecv], dim, msg.npeers - 1))

        return make_efunc('pokempi%s' % key, body)
Esempio n. 8
0
    def test_make_efuncs(self, exprs, nfuncs, ntimeiters, nests):
        """Test construction of ElementalFunctions."""
        exprs = list(as_tuple(exprs))

        grid = Grid(shape=(10, 10))
        t = grid.stepping_dim  # noqa
        x, y = grid.dimensions  # noqa

        u = Function(name='u', grid=grid)  # noqa
        v = TimeFunction(name='v', grid=grid)  # noqa

        # List comprehension would need explicit locals/globals mappings to eval
        for i, e in enumerate(list(exprs)):
            exprs[i] = eval(e)

        op = Operator(exprs)

        # We create one ElementalFunction for each Iteration nest over space dimensions
        efuncs = []
        for n, tree in enumerate(retrieve_iteration_tree(op)):
            root = filter_iterations(tree, key=lambda i: i.dim.is_Space)[0]
            efuncs.append(make_efunc('f%d' % n, root))

        assert len(efuncs) == len(nfuncs) == len(ntimeiters) == len(nests)

        for efunc, nf, nt, nest in zip(efuncs, nfuncs, ntimeiters, nests):
            # Check the `efunc` parameters
            assert all(i in efunc.parameters for i in (x.symbolic_min, x.symbolic_max))
            assert all(i in efunc.parameters for i in (y.symbolic_min, y.symbolic_max))
            functions = FindSymbols().visit(efunc)
            assert len(functions) == nf
            assert all(i in efunc.parameters for i in functions)
            timeiters = [i for i in FindSymbols('free-symbols').visit(efunc)
                         if isinstance(i, Dimension) and i.is_Time]
            assert len(timeiters) == nt
            assert all(i in efunc.parameters for i in timeiters)
            assert len(efunc.parameters) == 4 + len(functions) + len(timeiters)

            # Check the loop nest structure
            trees = retrieve_iteration_tree(efunc)
            assert len(trees) == 1
            tree = trees[0]
            assert all(i.dim.name == j for i, j in zip(tree, nest))

            assert efunc.make_call()
Esempio n. 9
0
    def _make_remainder(self, hs, key, callcompute, region):
        assert callcompute.is_Call

        dim = Dimension(name='i')
        region_i = IndexedPointer(region, dim)

        dynamic_args_mapper = {}
        for i in hs.arguments:
            if i.is_Dimension:
                dynamic_args_mapper[i] = (FieldFromComposite(i.min_name, region_i),
                                          FieldFromComposite(i.max_name, region_i))
            else:
                dynamic_args_mapper[i] = (FieldFromComposite(i.name, region_i),)

        iet = callcompute._rebuild(dynamic_args_mapper=dynamic_args_mapper)
        # The -1 below is because an Iteration, by default, generates <=
        iet = Iteration(iet, dim, region.nregions - 1)

        return make_efunc('remainder%d' % key, iet)
Esempio n. 10
0
    def _make_remainder(self, hs, key, callcompute, *args):
        assert callcompute.is_Call

        items = []
        mapper = OrderedDict()
        for d, (left, right) in hs.omapper.items():
            defleft, defright = callcompute.dynamic_defaults[d]
            dmapper = OrderedDict()
            dmapper[(d, CORE, CENTER)] = (defleft, defright)
            dmapper[(d, OWNED, LEFT)] = (defleft - left, defleft)
            dmapper[(d, OWNED, RIGHT)] = (defright, defright - right)
            mapper.update(dmapper)
            items.append(list(dmapper))

        body = []
        for i in product(*items):
            if all(r is CORE for _, r, _ in i):
                continue
            dynamic_args_mapper = {d: mapper[(d, r, s)] for d, r, s in i}
            body.append(callcompute._rebuild(dynamic_args_mapper=dynamic_args_mapper,
                                             incr=False))

        return make_efunc('remainder%s' % key, body)
Esempio n. 11
0
    def _make_poke(self, hs, key, msgs):
        flag = Symbol(name='flag')
        initflag = LocalExpression(DummyEq(flag, 0))

        body = [initflag]
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testrecv = Call(
                'MPI_Test',
                [rrecv, Byref(flag),
                 Macro('MPI_STATUS_IGNORE')])
            testsend = Call(
                'MPI_Test',
                [rsend, Byref(flag),
                 Macro('MPI_STATUS_IGNORE')])

            body.append(Iteration([testsend, testrecv], dim, msg.npeers - 1))

        return make_efunc('pokempi%d' % key, body)
Esempio n. 12
0
    def _make_remainder(self, compute, hs, key):
        assert compute.is_Call

        items = []
        mapper = OrderedDict()
        for d, (left, right) in hs.omapper.items():
            defleft, defright = compute.dynamic_defaults[d]
            dmapper = OrderedDict()
            dmapper[(d, CORE, CENTER)] = (defleft, defright)
            dmapper[(d, OWNED, LEFT)] = (defleft - left, defleft)
            dmapper[(d, OWNED, RIGHT)] = (defright, defright - right)
            mapper.update(dmapper)
            items.append(list(dmapper))

        body = []
        for i in product(*items):
            if all(r is CORE for _, r, _ in i):
                continue
            dynamic_args_mapper = {d: mapper[(d, r, s)] for d, r, s in i}
            body.append(
                compute._rebuild(dynamic_args_mapper=dynamic_args_mapper,
                                 incr=False))

        return make_efunc('remainder%s' % key, body)
Esempio n. 13
0
 def _make_compute(self, hs, key, *args):
     if hs.body.is_Call:
         return None
     else:
         return make_efunc('compute%d' % key, hs.body, hs.arguments)
Esempio n. 14
0
 def _make_compute(self, hs, key, *args):
     if hs.body.is_Call:
         return None
     else:
         return make_efunc('compute%s' % key, hs.body, hs.dimensions)
Esempio n. 15
0
def relax_incr_dimensions(iet, **kwargs):
    """
    Recast Iterations over IncrDimensions as ElementalFunctions; insert
    ElementalCalls to iterate over the "main" and "remainder" regions induced
    by the IncrDimensions.
    """
    sregistry = kwargs['sregistry']

    efuncs = []
    mapper = {}
    for tree in retrieve_iteration_tree(iet):
        iterations = [i for i in tree if i.dim.is_Incr]
        if not iterations:
            continue

        root = iterations[0]
        if root in mapper:
            continue

        outer, inner = split(iterations, lambda i: not i.dim.parent.is_Incr)

        # Compute the iteration ranges
        ranges = []
        for i in outer:
            maxb = i.symbolic_max - (i.symbolic_size % i.dim.step)
            ranges.append(((i.symbolic_min, maxb, i.dim.step),
                           (maxb + 1, i.symbolic_max, i.symbolic_max - maxb)))

        # Remove any offsets
        # E.g., `x = x_m + 2 to x_M - 2` --> `x = x_m to x_M`
        outer = [i._rebuild(limits=(i.dim.root.symbolic_min, i.dim.root.symbolic_max,
                                    i.step))
                 for i in outer]

        # Create the ElementalFunction
        name = sregistry.make_name(prefix="bf")
        body = compose_nodes(outer)
        dynamic_parameters = flatten((i.symbolic_bounds, i.step) for i in outer)
        dynamic_parameters.extend([i.step for i in inner if not is_integer(i.step)])
        efunc = make_efunc(name, body, dynamic_parameters)

        efuncs.append(efunc)

        # Create the ElementalCalls
        calls = []
        for p in product(*ranges):
            dynamic_args_mapper = {}
            for i, (m, M, b) in zip(outer, p):
                dynamic_args_mapper[i.symbolic_min] = m
                dynamic_args_mapper[i.symbolic_max] = M
                dynamic_args_mapper[i.step] = b
                for j in inner:
                    if j.dim.root is i.dim.root and not is_integer(j.step):
                        value = j.step if b is i.step else b
                        dynamic_args_mapper[j.step] = (value,)
            calls.append(efunc.make_call(dynamic_args_mapper))

        mapper[root] = List(body=calls)

    iet = Transformer(mapper).visit(iet)

    return iet, {'efuncs': efuncs}
Esempio n. 16
0
    def make_blocking(self, iet):
        """
        Apply loop blocking to PARALLEL Iteration trees.
        """
        # Make sure loop blocking will span as many Iterations as possible
        iet = fold_blockable_tree(iet, self.blockinner)

        mapper = {}
        efuncs = []
        block_dims = []
        for tree in retrieve_iteration_tree(iet):
            # Is the Iteration tree blockable ?
            iterations = filter_iterations(tree, lambda i: i.is_Parallel and i.is_Affine)
            if not self.blockinner:
                iterations = iterations[:-1]
            if len(iterations) <= 1:
                continue
            root = iterations[0]
            if not self.blockalways:
                # Heuristically bypass loop blocking if we think `tree`
                # won't be computationally expensive. This will help with code
                # size/readbility, JIT time, and auto-tuning time
                if not (tree.root.is_Sequential or iet.is_Callable):
                    # E.g., not inside a time-stepping Iteration
                    continue
                if any(i.dim.is_Sub and i.dim.local for i in tree):
                    # At least an outer Iteration is over a local SubDimension,
                    # which suggests the computational cost of this Iteration
                    # nest will be negligible w.r.t. the "core" Iteration nest
                    # (making use of non-local (Sub)Dimensions only)
                    continue
            if not IsPerfectIteration().visit(root):
                # Don't know how to block non-perfect nests
                continue

            # Apply hierarchical loop blocking to `tree`
            level_0 = []  # Outermost level of blocking
            level_i = [[] for i in range(1, self.nlevels)]  # Inner levels of blocking
            intra = []  # Within the smallest block
            for i in iterations:
                template = "%s%d_blk%s" % (i.dim.name, self.nblocked, '%d')
                properties = (PARALLEL,) + ((AFFINE,) if i.is_Affine else ())

                # Build Iteration across `level_0` blocks
                d = BlockDimension(i.dim, name=template % 0)
                level_0.append(Iteration([], d, d.symbolic_max, properties=properties))

                # Build Iteration across all `level_i` blocks, `i` in (1, self.nlevels]
                for n, li in enumerate(level_i, 1):
                    di = BlockDimension(d, name=template % n)
                    li.append(Iteration([], di, limits=(d, d+d.step-1, di.step),
                                        properties=properties))
                    d = di

                # Build Iteration within the smallest block
                intra.append(i._rebuild([], limits=(d, d+d.step-1, 1), offsets=(0, 0)))
            level_i = flatten(level_i)

            # Track all constructed BlockDimensions
            block_dims.extend(i.dim for i in level_0 + level_i)

            # Construct the blocked tree
            blocked = compose_nodes(level_0 + level_i + intra + [iterations[-1].nodes])
            blocked = unfold_blocked_tree(blocked)

            # Promote to a separate Callable
            dynamic_parameters = flatten((l0.dim, l0.step) for l0 in level_0)
            dynamic_parameters.extend([li.step for li in level_i])
            efunc = make_efunc("bf%d" % self.nblocked, blocked, dynamic_parameters)
            efuncs.append(efunc)

            # Compute the iteration ranges
            ranges = []
            for i, l0 in zip(iterations, level_0):
                maxb = i.symbolic_max - (i.symbolic_size % l0.step)
                ranges.append(((i.symbolic_min, maxb, l0.step),
                               (maxb + 1, i.symbolic_max, i.symbolic_max - maxb)))

            # Build Calls to the `efunc`
            body = []
            for p in product(*ranges):
                dynamic_args_mapper = {}
                for l0, (m, M, b) in zip(level_0, p):
                    dynamic_args_mapper[l0.dim] = (m, M)
                    dynamic_args_mapper[l0.step] = (b,)
                    for li in level_i:
                        if li.dim.root is l0.dim.root:
                            value = li.step if b is l0.step else b
                            dynamic_args_mapper[li.step] = (value,)
                call = efunc.make_call(dynamic_args_mapper)
                body.append(List(body=call))

            mapper[root] = List(body=body)

            # Next blockable nest, use different (unique) variable/function names
            self.nblocked += 1

        iet = Transformer(mapper).visit(iet)

        # Force-unfold if some folded Iterations haven't been blocked in the end
        iet = unfold_blocked_tree(iet)

        return iet, {'dimensions': block_dims,
                     'efuncs': efuncs,
                     'args': [i.step for i in block_dims]}
Esempio n. 17
0
    def _loop_blocking(self, iet):
        """
        Apply loop blocking to PARALLEL Iteration trees.
        """
        blockinner = bool(self.params.get('blockinner'))
        blockalways = bool(self.params.get('blockalways'))

        # Make sure loop blocking will span as many Iterations as possible
        iet = fold_blockable_tree(iet, blockinner)

        mapper = {}
        efuncs = []
        block_dims = []
        for tree in retrieve_iteration_tree(iet):
            # Is the Iteration tree blockable ?
            iterations = filter_iterations(tree, lambda i: i.is_Parallel)
            if not blockinner:
                iterations = iterations[:-1]
            if len(iterations) <= 1:
                continue
            root = iterations[0]
            if not blockalways:
                # Heuristically bypass loop blocking if we think `tree`
                # won't be computationally expensive. This will help with code
                # size/redability, JIT time, and auto-tuning time
                if not (tree.root.is_Sequential or iet.is_Callable):
                    # E.g., not inside a time-stepping Iteration
                    continue
                if any(i.dim.is_Sub and i.dim.local for i in tree):
                    # At least an outer Iteration is over a local SubDimension,
                    # which suggests the computational cost of this Iteration
                    # nest will be negligible w.r.t. the "core" Iteration nest
                    # (making use of non-local (Sub)Dimensions only)
                    continue
            if not IsPerfectIteration().visit(root):
                # Don't know how to block non-perfect nests
                continue

            # Apply loop blocking to `tree`
            interb = []
            intrab = []
            for i in iterations:
                d = BlockDimension(i.dim, name="%s%d_blk" % (i.dim.name, len(mapper)))
                block_dims.append(d)
                # Build Iteration over blocks
                properties = (PARALLEL,) + ((AFFINE,) if i.is_Affine else ())
                interb.append(Iteration([], d, d.symbolic_max, properties=properties))
                # Build Iteration within a block
                intrab.append(i._rebuild([], limits=(d, d+d.step-1, 1), offsets=(0, 0)))

            # Construct the blocked tree
            blocked = compose_nodes(interb + intrab + [iterations[-1].nodes])
            blocked = unfold_blocked_tree(blocked)

            # Promote to a separate Callable
            dynamic_parameters = flatten((bi.dim, bi.dim.symbolic_size) for bi in interb)
            efunc = make_efunc("bf%d" % len(mapper), blocked, dynamic_parameters)
            efuncs.append(efunc)

            # Compute the iteration ranges
            ranges = []
            for i, bi in zip(iterations, interb):
                maxb = i.symbolic_max - (i.symbolic_size % bi.dim.step)
                ranges.append(((i.symbolic_min, maxb, bi.dim.step),
                               (maxb + 1, i.symbolic_max, i.symbolic_max - maxb)))

            # Build Calls to the `efunc`
            body = []
            for p in product(*ranges):
                dynamic_args_mapper = {}
                for bi, (m, M, b) in zip(interb, p):
                    dynamic_args_mapper[bi.dim] = (m, M)
                    dynamic_args_mapper[bi.dim.step] = (b,)
                call = efunc.make_call(dynamic_args_mapper)
                body.append(List(body=call))

            mapper[root] = List(body=body)

        iet = Transformer(mapper).visit(iet)

        return iet, {'dimensions': block_dims, 'efuncs': efuncs,
                     'args': [i.step for i in block_dims]}
Esempio n. 18
0
    def _loop_blocking(self, iet):
        """
        Apply loop blocking to PARALLEL Iteration trees.
        """
        blockinner = bool(self.params.get('blockinner'))
        blockalways = bool(self.params.get('blockalways'))

        # Make sure loop blocking will span as many Iterations as possible
        iet = fold_blockable_tree(iet, blockinner)

        mapper = {}
        efuncs = []
        block_dims = []
        for tree in retrieve_iteration_tree(iet):
            # Is the Iteration tree blockable ?
            iterations = filter_iterations(tree, lambda i: i.is_Parallel)
            if not blockinner:
                iterations = iterations[:-1]
            if len(iterations) <= 1:
                continue
            root = iterations[0]
            if not (tree.root.is_Sequential
                    or iet.is_Callable) and not blockalways:
                # Heuristic: avoid polluting the generated code with blocked
                # nests (thus increasing JIT compilation time and affecting
                # readability) if the blockable tree isn't embedded in a
                # sequential loop (e.g., a timestepping loop)
                continue

            # Apply loop blocking to `tree`
            interb = []
            intrab = []
            for i in iterations:
                d = BlockDimension(i.dim,
                                   name="%s%d_blk" % (i.dim.name, len(mapper)))
                block_dims.append(d)
                # Build Iteration over blocks
                interb.append(
                    Iteration([], d, d.symbolic_max, properties=PARALLEL))
                # Build Iteration within a block
                intrab.append(
                    i._rebuild([],
                               limits=(d, d + d.step - 1, 1),
                               offsets=(0, 0)))

            # Construct the blocked tree
            blocked = compose_nodes(interb + intrab + [iterations[-1].nodes])
            blocked = unfold_blocked_tree(blocked)

            # Promote to a separate Callable
            dynamic_parameters = flatten(
                (bi.dim, bi.dim.symbolic_size) for bi in interb)
            efunc = make_efunc("bf%d" % len(mapper), blocked,
                               dynamic_parameters)
            efuncs.append(efunc)

            # Compute the iteration ranges
            ranges = []
            for i, bi in zip(iterations, interb):
                maxb = i.symbolic_max - (i.symbolic_size % bi.dim.step)
                ranges.append(
                    ((i.symbolic_min, maxb, bi.dim.step),
                     (maxb + 1, i.symbolic_max, i.symbolic_max - maxb)))

            # Build Calls to the `efunc`
            body = []
            for p in product(*ranges):
                dynamic_args_mapper = {}
                for bi, (m, M, b) in zip(interb, p):
                    dynamic_args_mapper[bi.dim] = (m, M)
                    dynamic_args_mapper[bi.dim.step] = (b, )
                call = efunc.make_call(dynamic_args_mapper)
                body.append(List(body=call))

            mapper[root] = List(body=body)

        iet = Transformer(mapper).visit(iet)

        return iet, {
            'dimensions': block_dims,
            'efuncs': efuncs,
            'args': [i.step for i in block_dims]
        }
Esempio n. 19
0
 def _make_compute(self, hs, key):
     if hs.body.is_Call:
         return None
     else:
         return make_efunc('compute%s' % key, hs.body, hs.dimensions)
Esempio n. 20
0
    def _loop_blocking(self, iet):
        """
        Apply loop blocking to PARALLEL Iteration trees.
        """
        blockinner = bool(self.params.get('blockinner'))
        blockalways = bool(self.params.get('blockalways'))
        noinline = self._compiler_decoration('noinline', cgen.Comment('noinline?'))

        # Make sure loop blocking will span as many Iterations as possible
        iet = fold_blockable_tree(iet, blockinner)

        mapper = {}
        efuncs = OrderedDict()
        block_dims = []
        for tree in retrieve_iteration_tree(iet):
            # Is the Iteration tree blockable ?
            candidates = [i for i in tree if i.is_Parallel]
            if blockinner:
                iterations = candidates
            else:
                iterations = [i for i in candidates if not i.is_Vectorizable]
            if len(iterations) <= 1:
                continue
            root = iterations[0]
            if not IsPerfectIteration().visit(root):
                # Illegal/unsupported
                continue
            if not tree.root.is_Sequential and not blockalways:
                # Heuristic: avoid polluting the generated code with blocked
                # nests (thus increasing JIT compilation time and affecting
                # readability) if the blockable tree isn't embedded in a
                # sequential loop (e.g., a timestepping loop)
                continue

            # Apply loop blocking to `tree`
            interb = []
            intrab = []
            for i in iterations:
                d = BlockDimension(i.dim, name="%s%d_block" % (i.dim.name, len(mapper)))
                # Build Iteration over blocks
                interb.append(Iteration([], d, d.symbolic_max, offsets=i.offsets,
                                        properties=PARALLEL))
                # Build Iteration within a block
                intrab.append(i._rebuild([], limits=(d, d+d.step-1, 1), offsets=(0, 0)))
                # Record that a new BlockDimension has been introduced
                block_dims.append(d)

            # Construct the blocked tree
            blocked = compose_nodes(interb + intrab + [iterations[-1].nodes])
            blocked = unfold_blocked_tree(blocked)

            # Promote to a separate Callable
            dynamic_parameters = flatten((bi.dim, bi.dim.symbolic_size) for bi in interb)
            efunc0 = make_efunc("bf%d" % len(mapper), blocked, dynamic_parameters)

            # Compute the iteration ranges
            ranges = []
            for i, bi in zip(iterations, interb):
                maxb = i.symbolic_max - (i.symbolic_size % bi.dim.step)
                ranges.append(((i.symbolic_min, maxb, bi.dim.step),
                               (maxb + 1, i.symbolic_max, i.symbolic_max - maxb)))

            # Build Calls to the `efunc`
            body = []
            for p in product(*ranges):
                dynamic_args_mapper = {}
                for bi, (m, M, b) in zip(interb, p):
                    dynamic_args_mapper[bi.dim] = (m, M)
                    dynamic_args_mapper[bi.dim.step] = (b,)
                call = efunc0.make_call(dynamic_args_mapper)
                body.append(List(header=noinline, body=call))

            # Build indirect Call to the `efunc0` Calls
            dynamic_parameters = [i.dim.root for i in candidates]
            dynamic_parameters.extend([bi.dim.step for bi in interb])
            efunc1 = make_efunc("f%d" % len(mapper), body, dynamic_parameters)

            # Track everything to ultimately transform the input `iet`
            mapper[root] = efunc1.make_call()
            efuncs[efunc1] = None
            efuncs[efunc0] = [efunc1.name]

        iet = Transformer(mapper).visit(iet)

        return iet, {'dimensions': block_dims, 'efuncs': efuncs}
Esempio n. 21
0
    def _loop_blocking(self, iet):
        """
        Apply loop blocking to PARALLEL Iteration trees.
        """
        blockinner = bool(self.params.get('blockinner'))
        blockalways = bool(self.params.get('blockalways'))

        # Make sure loop blocking will span as many Iterations as possible
        iet = fold_blockable_tree(iet, blockinner)

        mapper = {}
        efuncs = []
        block_dims = []
        for tree in retrieve_iteration_tree(iet):
            # Is the Iteration tree blockable ?
            iterations = filter_iterations(tree, lambda i: i.is_Parallel)
            if not blockinner:
                iterations = iterations[:-1]
            if len(iterations) <= 1:
                continue
            root = iterations[0]
            if not (tree.root.is_Sequential or iet.is_Callable) and not blockalways:
                # Heuristic: avoid polluting the generated code with blocked
                # nests (thus increasing JIT compilation time and affecting
                # readability) if the blockable tree isn't embedded in a
                # sequential loop (e.g., a timestepping loop)
                continue

            # Apply loop blocking to `tree`
            interb = []
            intrab = []
            for i in iterations:
                d = BlockDimension(i.dim, name="%s%d_blk" % (i.dim.name, len(mapper)))
                block_dims.append(d)
                # Build Iteration over blocks
                properties = (PARALLEL,) + ((AFFINE,) if i.is_Affine else ())
                interb.append(Iteration([], d, d.symbolic_max, properties=properties))
                # Build Iteration within a block
                intrab.append(i._rebuild([], limits=(d, d+d.step-1, 1), offsets=(0, 0)))

            # Construct the blocked tree
            blocked = compose_nodes(interb + intrab + [iterations[-1].nodes])
            blocked = unfold_blocked_tree(blocked)

            # Promote to a separate Callable
            dynamic_parameters = flatten((bi.dim, bi.dim.symbolic_size) for bi in interb)
            efunc = make_efunc("bf%d" % len(mapper), blocked, dynamic_parameters)
            efuncs.append(efunc)

            # Compute the iteration ranges
            ranges = []
            for i, bi in zip(iterations, interb):
                maxb = i.symbolic_max - (i.symbolic_size % bi.dim.step)
                ranges.append(((i.symbolic_min, maxb, bi.dim.step),
                               (maxb + 1, i.symbolic_max, i.symbolic_max - maxb)))

            # Build Calls to the `efunc`
            body = []
            for p in product(*ranges):
                dynamic_args_mapper = {}
                for bi, (m, M, b) in zip(interb, p):
                    dynamic_args_mapper[bi.dim] = (m, M)
                    dynamic_args_mapper[bi.dim.step] = (b,)
                call = efunc.make_call(dynamic_args_mapper)
                body.append(List(body=call))

            mapper[root] = List(body=body)

        iet = Transformer(mapper).visit(iet)

        return iet, {'dimensions': block_dims, 'efuncs': efuncs,
                     'args': [i.step for i in block_dims]}