Ejemplo n.º 1
0
    def callback(self, clusters, prefix):
        if not prefix:
            return clusters

        d = prefix[-1].dim

        subiters = flatten(
            [c.ispace.sub_iterators.get(d, []) for c in clusters])
        subiters = {i for i in subiters if i.is_Stepping}
        if not subiters:
            return clusters

        # Collect the index access functions along `d`, e.g., `t + 1` where `t` is
        # a SteppingDimension for `d = time`
        mapper = DefaultOrderedDict(lambda: DefaultOrderedDict(set))
        for c in clusters:
            indexeds = [
                a.indexed for a in c.scope.accesses if a.function.is_Tensor
            ]

            for i in indexeds:
                try:
                    iaf = i.indices[d]
                except KeyError:
                    continue

                # Sanity checks
                sis = iaf.free_symbols & subiters
                if len(sis) == 0:
                    continue
                elif len(sis) == 1:
                    si = sis.pop()
                else:
                    raise InvalidOperator(
                        "Cannot use multiple SteppingDimensions "
                        "to index into a Function")
                size = i.function.shape_allocated[d]
                assert is_integer(size)

                mapper[size][si].add(iaf)

        # Construct the ModuloDimensions
        mds = []
        for size, v in mapper.items():
            for si, iafs in list(v.items()):
                # Offsets are sorted so that the semantic order (t0, t1, t2) follows
                # SymPy's index ordering (t, t-1, t+1) afer modulo replacement so
                # that associativity errors are consistent. This corresponds to
                # sorting offsets {-1, 0, 1} as {0, -1, 1} assigning -inf to 0
                siafs = sorted(iafs,
                               key=lambda i: -np.inf
                               if i - si == 0 else (i - si))

                for iaf in siafs:
                    name = '%s%d' % (si.name, len(mds))
                    offset = uxreplace(iaf, {si: d.root})
                    mds.append(
                        ModuloDimension(name, si, offset, size, origin=iaf))

        # Replacement rule for ModuloDimensions
        def rule(size, e):
            try:
                return e.function.shape_allocated[d] == size
            except (AttributeError, KeyError):
                return False

        # Reconstruct the Clusters
        processed = []
        for c in clusters:
            # Apply substitutions to expressions
            # Note: In an expression, there could be `u[t+1, ...]` and `v[t+1,
            # ...]`, where `u` and `v` are TimeFunction with circular time
            # buffers (save=None) *but* different modulo extent. The `t+1`
            # indices above are therefore conceptually different, so they will
            # be replaced with the proper ModuloDimension through two different
            # calls to `xreplace_indices`
            exprs = c.exprs
            groups = as_mapper(mds, lambda d: d.modulo)
            for size, v in groups.items():
                mapper = {md.origin: md for md in v}

                func = partial(xreplace_indices,
                               mapper=mapper,
                               key=partial(rule, size))
                exprs = [e.apply(func) for e in exprs]

            # Augment IterationSpace
            ispace = IterationSpace(c.ispace.intervals, {
                **c.ispace.sub_iterators,
                **{
                    d: tuple(mds)
                }
            }, c.ispace.directions)

            processed.append(c.rebuild(exprs=exprs, ispace=ispace))

        return processed
Ejemplo n.º 2
0
def linearize_accesses(iet, key, cache, sregistry):
    """
    Turn Indexeds into FIndexeds and create the necessary access Macros.
    """
    # `functions` are all Functions that `iet` may need to linearize
    functions = [f for f in FindSymbols().visit(iet) if key(f) and f.ndim > 1]
    functions = sorted(functions, key=lambda f: len(f.dimensions), reverse=True)

    # `functions_unseen` are all Functions that `iet` may need to linearize
    # and have not been seen while processing other IETs
    functions_unseen = [f for f in functions if f not in cache]

    # Find unique sizes (unique -> minimize necessary registers)
    mapper = DefaultOrderedDict(list)
    for f in functions:
        # NOTE: the outermost dimension is unnecessary
        for d in f.dimensions[1:]:
            # TODO: same grid + same halo => same padding, however this is
            # never asserted throughout the compiler yet... maybe should do
            # it when in debug mode at `prepare_arguments` time, ie right
            # before jumping to C?
            mapper[(d, f._size_halo[d], getattr(f, 'grid', None))].append(f)

    # For all unseen Functions, build the size exprs. For example:
    # `x_fsz0 = u_vec->size[1]`
    imapper = DefaultOrderedDict(dict)
    for (d, halo, _), v in mapper.items():
        v_unseen = [f for f in v if f in functions_unseen]
        if not v_unseen:
            continue
        expr = _generate_fsz(v_unseen[0], d, sregistry)
        if expr:
            for f in v_unseen:
                imapper[f][d] = expr.write
                cache[f].stmts0.append(expr)

    # For all unseen Functions, build the stride exprs. For example:
    # `y_stride0 = y_fsz0*z_fsz0`
    built = {}
    mapper = DefaultOrderedDict(dict)
    for f, v in imapper.items():
        for d in v:
            n = f.dimensions.index(d)
            expr = prod(v[i] for i in f.dimensions[n:])
            try:
                stmt = built[expr]
            except KeyError:
                name = sregistry.make_name(prefix='%s_stride' % d.name)
                s = Symbol(name=name, dtype=np.int64, is_const=True)
                stmt = built[expr] = DummyExpr(s, expr, init=True)
            mapper[f][d] = stmt.write
            cache[f].stmts1.append(stmt)
    mapper.update([(f, {}) for f in functions_unseen if f not in mapper])

    # For all unseen Functions, build defines. For example:
    # `#define uL(t, x, y, z) u[(t)*t_stride0 + (x)*x_stride0 + (y)*y_stride0 + (z)]`
    headers = []
    findexeds = {}
    for f in functions:
        if cache[f].cbk is None:
            header, cbk = _generate_macro(f, mapper[f], sregistry)
            headers.append(header)
            cache[f].cbk = findexeds[f] = cbk
        else:
            findexeds[f] = cache[f].cbk

    # Build "functional" Indexeds. For example:
    # `u[t2, x+8, y+9, z+7] => uL(t2, x+8, y+9, z+7)`
    mapper = {}
    indexeds = FindSymbols('indexeds').visit(iet)
    for i in indexeds:
        try:
            mapper[i] = findexeds[i.function](i)
        except KeyError:
            pass

    # Introduce the linearized expressions
    iet = Uxreplace(mapper).visit(iet)

    # All Functions that actually require linearization in `iet`
    candidates = []

    candidates.extend(filter_ordered(i.function for i in indexeds))

    calls = FindNodes(Call).visit(iet)
    cfuncs = filter_ordered(flatten(i.functions for i in calls))
    candidates.extend(i for i in cfuncs if i.function.is_DiscreteFunction)

    # All Functions that can be linearized in `iet`
    defines = FindSymbols('defines-aliases').visit(iet)

    # Place the linearization expressions or delegate to ancestor efunc
    stmts0 = []
    stmts1 = []
    args = []
    for f in candidates:
        if f in defines:
            stmts0.extend(cache[f].stmts0)
            stmts1.extend(cache[f].stmts1)
        else:
            args.extend([e.write for e in cache[f].stmts1])
    if stmts0:
        assert len(stmts1) > 0
        stmts0 = filter_ordered(stmts0) + [BlankLine]
        stmts1 = filter_ordered(stmts1) + [BlankLine]
        body = iet.body._rebuild(body=tuple(stmts0) + tuple(stmts1) + iet.body.body)
        iet = iet._rebuild(body=body)
    else:
        assert len(stmts0) == 0

    return iet, headers, args
Ejemplo n.º 3
0
    def __init__(self, function, contracted_dims, accessv, n, async_degree):
        self.function = function
        self.accessv = accessv

        contraction_mapper = {}
        index_mapper = {}
        dims = list(function.dimensions)
        for d in contracted_dims:
            assert d in function.dimensions

            # Determine the buffer size along `d`
            indices = filter_ordered(i.indices[d] for i in accessv.accesses)
            slots = [i.xreplace({d: 0, d.spacing: 1}) for i in indices]
            size = max(slots) - min(slots) + 1

            if async_degree is not None:
                if async_degree < size:
                    warning("Ignoring provided asynchronous degree as it'd be "
                            "too small for the required buffer (provided %d, "
                            "but need at least %d for `%s`)"
                            % (async_degree, size, function.name))
                else:
                    size = async_degree

            # Replace `d` with a suitable CustomDimension
            bd = CustomDimension('db%d' % n, 0, size-1, size, d)
            contraction_mapper[d] = dims[dims.index(d)] = bd

            if size > 1:
                # Create the necessary SteppingDimensions for indexing
                sd = SteppingDimension(name='sb%d' % n, parent=bd)
                index_mapper.update({i: i.xreplace({d: sd}) for i in indices})
            else:
                # Special case, no need to keep a SteppingDimension around
                index_mapper.update({i: 0 for i in indices})

        self.contraction_mapper = contraction_mapper
        self.index_mapper = index_mapper

        # Track the SubDimensions used to index into `function`
        subdims_mapper = DefaultOrderedDict(set)
        for e in accessv.mapper:
            try:
                # Case 1: implicitly via SubDomains
                m = {d.root: v for d, v in e.subdomain.dimension_map.items()}
            except AttributeError:
                # Case 2: explicitly via the lower-level SubDimension API
                m = {i.root: i for i in e.free_symbols
                     if isinstance(i, Dimension) and (i.is_Sub or not i.is_Derived)}
            for d, v in m.items():
                subdims_mapper[d].add(v)
        if any(len(v) > 1 for v in subdims_mapper.values()):
            # Non-uniform SubDimensions. At this point we're going to raise
            # an exception. It's either illegal or still unsupported
            for v in subdims_mapper.values():
                for d0, d1 in combinations(v, 2):
                    if d0.overlap(d1):
                        raise InvalidOperator("Cannot apply `buffering` to `%s` as it "
                                              "is accessed over the overlapping "
                                              " SubDimensions `<%s, %s>`" %
                                              (function, d0, d1))
            self.subdims_mapper = None
            raise NotImplementedError("`buffering` does not support multiple "
                                      "non-overlapping SubDimensions yet.")
        else:
            self.subdims_mapper = {d: v.pop() for d, v in subdims_mapper.items()}

        self.buffer = Array(name='%sb' % function.name,
                            dimensions=dims,
                            dtype=function.dtype,
                            halo=function.halo,
                            space='mapped')
Ejemplo n.º 4
0
def linearize_accesses(iet, cache, sregistry):
    """
    Turn Indexeds into FIndexeds and create the necessary access Macros.
    """
    # Find all objects amenable to linearization
    symbol_names = {i.name for i in FindSymbols('indexeds').visit(iet)}
    functions = [f for f in FindSymbols().visit(iet)
                 if ((f.is_DiscreteFunction or f.is_Array) and
                     f.ndim > 1 and
                     f.name in symbol_names)]
    functions = sorted(functions, key=lambda f: len(f.dimensions), reverse=True)

    # Find unique sizes (unique -> minimize necessary registers)
    mapper = DefaultOrderedDict(list)
    for f in functions:
        if f not in cache:
            # NOTE: the outermost dimension is unnecessary
            for d in f.dimensions[1:]:
                # TODO: same grid + same halo => same padding, however this is
                # never asserted throughout the compiler yet... maybe should do
                # it when in debug mode at `prepare_arguments` time, ie right
                # before jumping to C?
                mapper[(d, f._size_halo[d], getattr(f, 'grid', None))].append(f)

    # Build all exprs such as `x_fsz0 = u_vec->size[1]`
    imapper = DefaultOrderedDict(list)
    for (d, halo, _), v in mapper.items():
        name = sregistry.make_name(prefix='%s_fsz' % d.name)
        s = Symbol(name=name, dtype=np.int32, is_const=True)
        try:
            expr = DummyExpr(s, v[0]._C_get_field(FULL, d).size, init=True)
        except AttributeError:
            assert v[0].is_Array
            expr = DummyExpr(s, v[0].symbolic_shape[d], init=True)
        for f in v:
            imapper[f].append((d, s))
            cache[f].stmts0.append(expr)

    # Build all exprs such as `y_slc0 = y_fsz0*z_fsz0`
    built = {}
    mapper = DefaultOrderedDict(list)
    for f, v in imapper.items():
        for n, (d, _) in enumerate(v):
            expr = prod(list(zip(*v[n:]))[1])
            try:
                stmt = built[expr]
            except KeyError:
                name = sregistry.make_name(prefix='%s_slc' % d.name)
                s = Symbol(name=name, dtype=np.int32, is_const=True)
                stmt = built[expr] = DummyExpr(s, expr, init=True)
            mapper[f].append(stmt.write)
            cache[f].stmts1.append(stmt)
    mapper.update([(f, []) for f in functions if f not in mapper])

    # Build defines. For example:
    # `define uL(t, x, y, z) u[(t)*t_slice_sz + (x)*x_slice_sz + (y)*y_slice_sz + (z)]`
    headers = []
    findexeds = {}
    for f, szs in mapper.items():
        if cache[f].cbk is not None:
            # Perhaps we've already built an access macro for `f` through another efunc
            findexeds[f] = cache[f].cbk
        else:
            assert len(szs) == len(f.dimensions) - 1
            pname = sregistry.make_name(prefix='%sL' % f.name)

            expr = sum([MacroArgument(d.name)*s for d, s in zip(f.dimensions, szs)])
            expr += MacroArgument(f.dimensions[-1].name)
            expr = Indexed(IndexedData(f.name, None, f), expr)
            define = DefFunction(pname, f.dimensions)
            headers.append((ccode(define), ccode(expr)))

            cache[f].cbk = findexeds[f] = lambda i, pname=pname: FIndexed(i, pname)

    # Build "functional" Indexeds. For example:
    # `u[t2, x+8, y+9, z+7] => uL(t2, x+8, y+9, z+7)`
    mapper = {}
    for n in FindNodes(Expression).visit(iet):
        subs = {}
        for i in retrieve_indexed(n.expr):
            try:
                subs[i] = findexeds[i.function](i)
            except KeyError:
                pass
        mapper[n] = n._rebuild(expr=uxreplace(n.expr, subs))

    # Put together all of the necessary exprs for `y_fsz0`, ..., `y_slc0`, ...
    stmts0 = filter_ordered(flatten(cache[f].stmts0 for f in functions))
    if stmts0:
        stmts0.append(BlankLine)
    stmts1 = filter_ordered(flatten(cache[f].stmts1 for f in functions))
    if stmts1:
        stmts1.append(BlankLine)

    iet = Transformer(mapper).visit(iet)
    body = iet.body._rebuild(body=tuple(stmts0) + tuple(stmts1) + iet.body.body)
    iet = iet._rebuild(body=body)

    return iet, headers
Ejemplo n.º 5
0
class Buffer(object):

    """
    A buffer with metadata attached.

    Parameters
    ----------
    function : DiscreteFunction
        The object for which the buffer is created.
    contracted_dims : list of Dimension
        The Dimensions in `function` to be contracted, that is to be replaced
        by ModuloDimensions.
    accessv : AccessValue
        All accesses involving `function`.
    options : dict, optional
        The compilation options. See `buffering.__doc__`.
    sregistry : SymbolRegistry
        The symbol registry, to create unique names for buffers and Dimensions.
    bds : dict, optional
        All CustomDimensions created to define buffer dimensions, potentially
        reusable in the creation of this buffer. The object gets updated if new
        CustomDimensions are created.
    mds : dict, optional
        All ModuloDimensions created to index into other buffers, potentially reusable
        for indexing into this buffer. The object gets updated if new ModuloDimensions
        are created.
    """

    def __init__(self, function, contracted_dims, accessv, options, sregistry,
                 bds=None, mds=None):
        # Parse compilation options
        async_degree = options['buf-async-degree']
        space = options['buf-mem-space']
        dtype = options['buf-dtype'](function)

        self.function = function
        self.accessv = accessv

        self.contraction_mapper = {}
        self.index_mapper = defaultdict(dict)
        self.sub_iterators = defaultdict(list)
        self.subdims_mapper = DefaultOrderedDict(set)

        # Create the necessary ModuloDimensions for indexing into the buffer
        # E.g., `u[time,x] + u[time+1,x] -> `ub[sb0,x] + ub[sb1,x]`, where `sb0`
        # and `sb1` are ModuloDimensions starting at `time` and `time+1` respectively
        dims = list(function.dimensions)
        for d in contracted_dims:
            assert d in function.dimensions

            # Determine the buffer size, and therefore the span of the ModuloDimension,
            # along the contracting Dimension `d`
            indices = filter_ordered(i.indices[d] for i in accessv.accesses)
            slots = [i.subs({d: 0, d.spacing: 1}) for i in indices]
            try:
                size = max(slots) - min(slots) + 1
            except TypeError:
                # E.g., special case `slots=[-1 + time/factor, 2 + time/factor]`
                # Resort to the fast vector-based comparison machinery (rather than
                # the slower sympy.simplify)
                slots = [Vector(i) for i in slots]
                size = int((vmax(*slots) - vmin(*slots) + 1)[0])

            if async_degree is not None:
                if async_degree < size:
                    warning("Ignoring provided asynchronous degree as it'd be "
                            "too small for the required buffer (provided %d, "
                            "but need at least %d for `%s`)"
                            % (async_degree, size, function.name))
                else:
                    size = async_degree

            # Replace `d` with a suitable CustomDimension `bd`
            name = sregistry.make_name(prefix='db')
            bd = bds.setdefault((d, size), CustomDimension(name, 0, size-1, size, d))
            self.contraction_mapper[d] = dims[dims.index(d)] = bd

            # Finally create the ModuloDimensions as children of `bd`
            if size > 1:
                # Note: indices are sorted so that the semantic order (sb0, sb1, sb2)
                # follows SymPy's index ordering (time, time-1, time+1) after modulo
                # replacement, so that associativity errors are consistent. This very
                # same strategy is also applied in clusters/algorithms/Stepper
                p, _ = offset_from_centre(d, indices)
                indices = sorted(indices,
                                 key=lambda i: -np.inf if i - p == 0 else (i - p))
                for i in indices:
                    name = sregistry.make_name(prefix='sb')
                    md = mds.setdefault((bd, i), ModuloDimension(name, bd, i, size))
                    self.index_mapper[d][i] = md
                    self.sub_iterators[d.root].append(md)
            else:
                assert len(indices) == 1
                self.index_mapper[d][indices[0]] = 0

        # Track the SubDimensions used to index into `function`
        for e in accessv.mapper:
            m = {i.root: i for i in e.free_symbols
                 if isinstance(i, Dimension) and (i.is_Sub or not i.is_Derived)}
            for d, v in m.items():
                self.subdims_mapper[d].add(v)
        if any(len(v) > 1 for v in self.subdims_mapper.values()):
            # Non-uniform SubDimensions. At this point we're going to raise
            # an exception. It's either illegal or still unsupported
            for v in self.subdims_mapper.values():
                for d0, d1 in combinations(v, 2):
                    if d0.overlap(d1):
                        raise InvalidOperator("Cannot apply `buffering` to `%s` as it "
                                              "is accessed over the overlapping "
                                              " SubDimensions `<%s, %s>`" %
                                              (function, d0, d1))
            raise NotImplementedError("`buffering` does not support multiple "
                                      "non-overlapping SubDimensions yet.")
        else:
            self.subdims_mapper = {d: v.pop() for d, v in self.subdims_mapper.items()}

        # Build and sanity-check the buffer IterationIntervals
        self.itintervals_mapper = {}
        for e in accessv.mapper:
            for i in e.ispace.itintervals:
                v = self.itintervals_mapper.setdefault(i.dim, i.args)
                if v != self.itintervals_mapper[i.dim]:
                    raise NotImplementedError("Cannot apply `buffering` as the buffered "
                                              "function `%s` is accessed over multiple, "
                                              "non-compatible iteration spaces along the "
                                              "Dimension `%s`" % (function.name, i.dim))
        # Also add IterationIntervals for initialization along `x`, should `xi` be
        # the only written Dimension in the `x` hierarchy
        for d, (interval, _, _) in list(self.itintervals_mapper.items()):
            for i in d._defines:
                self.itintervals_mapper.setdefault(i, (interval.relaxed, (), Forward))

        # Finally create the actual buffer
        self.buffer = Array(name=sregistry.make_name(prefix='%sb' % function.name),
                            dimensions=dims,
                            dtype=dtype,
                            halo=function.halo,
                            space=space)

    def __repr__(self):
        return "Buffer[%s,<%s>]" % (self.buffer.name,
                                    ','.join(str(i) for i in self.contraction_mapper))

    @property
    def size(self):
        return np.prod([v.symbolic_size for v in self.contraction_mapper.values()])

    @property
    def is_read(self):
        return self.accessv.is_read

    @property
    def is_readonly(self):
        return self.is_read and self.lastwrite is None

    @property
    def firstread(self):
        return self.accessv.firstread

    @property
    def lastwrite(self):
        return self.accessv.lastwrite

    @property
    def has_uniform_subdims(self):
        return self.subdims_mapper is not None

    @cached_property
    def indexed(self):
        return self.buffer.indexed

    @cached_property
    def index_mapper_flat(self):
        ret = {}
        for mapper in self.index_mapper.values():
            ret.update(mapper)
        return ret

    @cached_property
    def writeto(self):
        """
        The `writeto` IterationSpace, that is the iteration space that must be
        iterated over in order to initialize the buffer.
        """
        intervals = []
        sub_iterators = {}
        directions = {}
        for d in self.buffer.dimensions:
            try:
                interval, si, direction = self.itintervals_mapper[d]
            except KeyError:
                # E.g., the contraction Dimension `db0`
                assert d in self.contraction_mapper.values()
                interval, si, direction = Interval(d, 0, 0), (), Forward
            intervals.append(interval)
            sub_iterators[d] = si
            directions[d] = direction

        relations = (self.buffer.dimensions,)
        intervals = IntervalGroup(intervals, relations=relations)

        return IterationSpace(intervals, sub_iterators, directions)

    @cached_property
    def written(self):
        """
        The `written` IterationSpace, that is the iteration space that must be
        iterated over in order to read all of the written buffer values.
        """
        intervals = []
        sub_iterators = {}
        directions = {}
        for dd in self.function.dimensions:
            d = dd.xreplace(self.subdims_mapper)
            try:
                interval, si, direction = self.itintervals_mapper[d]
            except KeyError:
                # E.g., d=time_sub
                assert d.is_NonlinearDerived
                d = d.root
                interval, si, direction = self.itintervals_mapper[d]
            intervals.append(interval)
            sub_iterators[d] = si + as_tuple(self.sub_iterators[d])
            directions[d] = direction

        relations = (tuple(i.dim for i in intervals),)
        intervals = IntervalGroup(intervals, relations=relations)

        return IterationSpace(intervals, sub_iterators, directions)

    @cached_property
    def lastmap(self):
        """
        A mapper from contracted Dimensions to a 2-tuple of indices representing,
        respectively, the "last" write to the buffer and the "last" read from the
        buffered Function. For example, `{time: (sb1, time+1)}`.
        """
        mapper = {}
        for d, m in self.index_mapper.items():
            try:
                func = max if self.written.directions[d.root] is Forward else min
                v = func(m)
            except TypeError:
                func = vmax if self.written.directions[d.root] is Forward else vmin
                v = func(*[Vector(i) for i in m])[0]
            mapper[d] = Map(m[v], v)

        return mapper

    @cached_property
    def initmap(self):
        """
        A mapper from contracted Dimensions to indices representing the min points
        for buffer initialization. For example, in the case of a forward-propagating
        `time` Dimension, we could have `{time: (time_m + db0) % 2, (time_m + db0)}`;
        likewise, for backwards, `{time: (time_M - 2 + db0) % 4, time_M - 2 + db0}`.
        """
        mapper = {}
        for d, bd in self.contraction_mapper.items():
            indices = list(self.index_mapper[d])

            # The buffer is initialized at `d_m(d_M) - offset`. E.g., a buffer with
            # six slots, used to replace a buffered Function accessed at `d-3`, `d`
            # and `d + 2`, will have `offset = 3`
            p, offset = offset_from_centre(d, indices)

            if self.written.directions[d.root] is Forward:
                v = p.subs(d.root, d.root.symbolic_min) - offset + bd
            else:
                v = p.subs(d.root, d.root.symbolic_max) - offset + bd

            mapper[d] = Map(v % bd.symbolic_size, v)

        return mapper