예제 #1
0
def offset_from_centre(d, indices):
    if d in indices:
        p = d
        offset = d - min(indices)
        assert is_integer(offset)

    elif len(indices) == 1:
        p = indices[0]
        offset = 0

    else:
        # E.g., `time/factor-1` and `time/factor+1` present among the
        # indices in `index_mapper`, but not `time/factor`. We reconstruct
        # `time/factor` -- the starting pointing at time_m or time_M
        assert len(indices) > 0
        v = indices[0]

        try:
            p = sum(v.args[1:])
            if not ((p - v).is_Integer or (p - v).is_Symbol):
                raise ValueError
        except (IndexError, ValueError):
            raise NotImplementedError("Cannot apply buffering with nonlinear "
                                      "index functions (found `%s`)" % v)
        try:
            # Start assuming e.g. `list(m) = [time - 1, time + 2]`
            offset = p - min(indices)
        except TypeError:
            # Actually, e.g. `list(m) = [time/factor - 1, time/factor + 2]`
            offset = p - vmin(*[Vector(i) for i in indices])[0]

    return p, offset
예제 #2
0
    def lastmap(self):
        """
        A mapper from contracted Dimensions to a 2-tuple of indices representing,
        respectively, the "last" write to the buffer and the "last" read from the
        buffered Function. For example, `{time: (sb1, time+1)}`.
        """
        mapper = {}
        for d, m in self.index_mapper.items():
            try:
                func = max if self.written.directions[d.root] is Forward else min
                v = func(m)
            except TypeError:
                func = vmax if self.written.directions[d.root] is Forward else vmin
                v = func(*[Vector(i) for i in m])[0]
            mapper[d] = Map(m[v], v)

        return mapper
예제 #3
0
def actions_from_update_memcpy(cluster, clusters, prefix, actions):
    it = prefix[-1]
    d = it.dim
    direction = it.direction

    # Prepare the data to instantiate a PrefetchUpdate SyncOp
    e = cluster.exprs[0]

    size = 1

    function = e.rhs.function
    fetch = e.rhs.indices[d]
    ifetch = fetch.subs(d, d.symbolic_min)
    fcond = None

    if direction is Forward:
        pfetch = fetch + 1
    else:
        pfetch = fetch - 1
    pcond = None

    target = e.lhs.function
    tstore0 = e.lhs.indices[d]

    # If fetching into e.g., `ub[sb1]`, we'll need to prefetch into e.g. `ub[sb0]`
    if is_integer(tstore0):
        tstore = tstore0
    else:
        assert tstore0.is_Modulo
        subiters = [
            md for md in cluster.sub_iterators[d]
            if md.parent is tstore0.parent
        ]
        osubiters = sorted(subiters, key=lambda i: Vector(i.offset))
        n = osubiters.index(tstore0)
        if direction is Forward:
            tstore = osubiters[(n + 1) % len(osubiters)]
        else:
            tstore = osubiters[(n - 1) % len(osubiters)]

    # Turn `cluster` into a prefetch Cluster
    expr = uxreplace(e, {tstore0: tstore, fetch: pfetch})

    guards = {
        d:
        And(
            cluster.guards.get(d, True),
            GuardBoundNext(function.indices[d], direction),
        )
    }

    syncs = {
        d: [
            PrefetchUpdate(d, size, function, fetch, ifetch, fcond, pfetch,
                           pcond, target, tstore)
        ]
    }

    pcluster = cluster.rebuild(exprs=expr, guards=guards, syncs=syncs)

    # Since we're turning `e` into a prefetch, we need to:
    # 1) attach a WaitPrefetch SyncOp to the first Cluster accessing `target`
    # 2) insert the prefetch Cluster right after the last Cluster accessing `target`
    # 3) drop the original Cluster performing a memcpy-based fetch
    n = clusters.index(cluster)
    first = None
    last = None
    for c in clusters[n + 1:]:
        if target in c.scope.reads:
            if first is None:
                first = c
            last = c
    assert first is not None
    assert last is not None
    actions[first].syncs[d].append(
        WaitPrefetch(d, size, function, fetch, ifetch, fcond, pfetch, pcond,
                     target, tstore))
    actions[last].insert.append(pcluster)
    actions[cluster].drop = True

    return last, pcluster
예제 #4
0
    def __init__(self, function, contracted_dims, accessv, options, sregistry,
                 bds=None, mds=None):
        # Parse compilation options
        async_degree = options['buf-async-degree']
        space = options['buf-mem-space']
        dtype = options['buf-dtype'](function)

        self.function = function
        self.accessv = accessv

        self.contraction_mapper = {}
        self.index_mapper = defaultdict(dict)
        self.sub_iterators = defaultdict(list)
        self.subdims_mapper = DefaultOrderedDict(set)

        # Create the necessary ModuloDimensions for indexing into the buffer
        # E.g., `u[time,x] + u[time+1,x] -> `ub[sb0,x] + ub[sb1,x]`, where `sb0`
        # and `sb1` are ModuloDimensions starting at `time` and `time+1` respectively
        dims = list(function.dimensions)
        for d in contracted_dims:
            assert d in function.dimensions

            # Determine the buffer size, and therefore the span of the ModuloDimension,
            # along the contracting Dimension `d`
            indices = filter_ordered(i.indices[d] for i in accessv.accesses)
            slots = [i.subs({d: 0, d.spacing: 1}) for i in indices]
            try:
                size = max(slots) - min(slots) + 1
            except TypeError:
                # E.g., special case `slots=[-1 + time/factor, 2 + time/factor]`
                # Resort to the fast vector-based comparison machinery (rather than
                # the slower sympy.simplify)
                slots = [Vector(i) for i in slots]
                size = int((vmax(*slots) - vmin(*slots) + 1)[0])

            if async_degree is not None:
                if async_degree < size:
                    warning("Ignoring provided asynchronous degree as it'd be "
                            "too small for the required buffer (provided %d, "
                            "but need at least %d for `%s`)"
                            % (async_degree, size, function.name))
                else:
                    size = async_degree

            # Replace `d` with a suitable CustomDimension `bd`
            name = sregistry.make_name(prefix='db')
            bd = bds.setdefault((d, size), CustomDimension(name, 0, size-1, size, d))
            self.contraction_mapper[d] = dims[dims.index(d)] = bd

            # Finally create the ModuloDimensions as children of `bd`
            if size > 1:
                # Note: indices are sorted so that the semantic order (sb0, sb1, sb2)
                # follows SymPy's index ordering (time, time-1, time+1) after modulo
                # replacement, so that associativity errors are consistent. This very
                # same strategy is also applied in clusters/algorithms/Stepper
                p, _ = offset_from_centre(d, indices)
                indices = sorted(indices,
                                 key=lambda i: -np.inf if i - p == 0 else (i - p))
                for i in indices:
                    name = sregistry.make_name(prefix='sb')
                    md = mds.setdefault((bd, i), ModuloDimension(name, bd, i, size))
                    self.index_mapper[d][i] = md
                    self.sub_iterators[d.root].append(md)
            else:
                assert len(indices) == 1
                self.index_mapper[d][indices[0]] = 0

        # Track the SubDimensions used to index into `function`
        for e in accessv.mapper:
            m = {i.root: i for i in e.free_symbols
                 if isinstance(i, Dimension) and (i.is_Sub or not i.is_Derived)}
            for d, v in m.items():
                self.subdims_mapper[d].add(v)
        if any(len(v) > 1 for v in self.subdims_mapper.values()):
            # Non-uniform SubDimensions. At this point we're going to raise
            # an exception. It's either illegal or still unsupported
            for v in self.subdims_mapper.values():
                for d0, d1 in combinations(v, 2):
                    if d0.overlap(d1):
                        raise InvalidOperator("Cannot apply `buffering` to `%s` as it "
                                              "is accessed over the overlapping "
                                              " SubDimensions `<%s, %s>`" %
                                              (function, d0, d1))
            raise NotImplementedError("`buffering` does not support multiple "
                                      "non-overlapping SubDimensions yet.")
        else:
            self.subdims_mapper = {d: v.pop() for d, v in self.subdims_mapper.items()}

        # Build and sanity-check the buffer IterationIntervals
        self.itintervals_mapper = {}
        for e in accessv.mapper:
            for i in e.ispace.itintervals:
                v = self.itintervals_mapper.setdefault(i.dim, i.args)
                if v != self.itintervals_mapper[i.dim]:
                    raise NotImplementedError("Cannot apply `buffering` as the buffered "
                                              "function `%s` is accessed over multiple, "
                                              "non-compatible iteration spaces along the "
                                              "Dimension `%s`" % (function.name, i.dim))
        # Also add IterationIntervals for initialization along `x`, should `xi` be
        # the only written Dimension in the `x` hierarchy
        for d, (interval, _, _) in list(self.itintervals_mapper.items()):
            for i in d._defines:
                self.itintervals_mapper.setdefault(i, (interval.relaxed, (), Forward))

        # Finally create the actual buffer
        self.buffer = Array(name=sregistry.make_name(prefix='%sb' % function.name),
                            dimensions=dims,
                            dtype=dtype,
                            halo=function.halo,
                            space=space)