Example #1
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
                                              **kwargs):
        """
        Search aliasing expressions and capture them into vector temporaries.

        Examples
        --------
        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        # For more information about "aliases", refer to collect.__doc__
        aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        graph = FlowGraph(cluster.exprs)
        time_invariants = {
            v.rhs: graph.time_invariant(v)
            for v in graph.values()
        }

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in graph.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(aliases.get(v.rhs))
            cost = estimate_cost(v, True) * naliases
            test0 = lambda: cost >= self.MIN_COST_ALIAS and naliases > 1
            test1 = lambda: cost >= self.MIN_COST_ALIAS_INV and time_invariants[
                v.rhs]
            if test0() or test1():
                candidates[v.rhs] = k
            else:
                processed.append(v)

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = []
        subs = {}
        for origin, alias in aliases.items():
            if all(i not in candidates for i in alias.aliased):
                continue

            # The write-to Intervals
            writeto = [
                Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0)))
                for i in cluster.ispace.intervals if not i.dim.is_Time
            ]
            writeto = IntervalGroup(writeto)

            # Optimization: no need to retain a SpaceDimension if it does not
            # induce a flow/anti dependence (below, `i.offsets` captures this, by
            # telling how much halo will be needed to honour such dependences)
            dep_inducing = [i for i in writeto if any(i.offsets)]
            try:
                index = writeto.index(dep_inducing[0])
                writeto = IntervalGroup(writeto[index:])
            except IndexError:
                warning("Couldn't optimize some of the detected redundancies")

            # Create a temporary to store `alias`
            dimensions = [d.root for d in writeto.dimensions]
            halo = [(abs(i.lower), abs(i.upper)) for i in writeto]
            array = Array(name=template(),
                          dimensions=dimensions,
                          halo=halo,
                          dtype=cluster.dtype)

            # Build up the expression evaluating `alias`
            access = tuple(i.dim - i.lower for i in writeto)
            expression = Eq(array[access], origin)

            # Create the substitution rules so that we can use the newly created
            # temporary in place of the aliasing expressions
            for aliased, distance in alias.with_distance:
                assert all(i.dim in distance.labels for i in writeto)
                access = [i.dim - i.lower + distance[i.dim] for i in writeto]
                if aliased in candidates:
                    # It would *not* be in `candidates` if part of a composite alias
                    subs[candidates[aliased]] = array[access]
                subs[aliased] = array[access]

            # Construct the `alias` IterationSpace
            intervals, sub_iterators, directions = cluster.ispace.args
            ispace = IterationSpace(intervals.add(writeto), sub_iterators,
                                    directions)

            # Construct the `alias` DataSpace
            mapper = detect_accesses(expression)
            parts = {
                k: IntervalGroup(build_intervals(v)).add(ispace.intervals)
                for k, v in mapper.items() if k
            }
            dspace = DataSpace(cluster.dspace.intervals, parts)

            # Create a new Cluster for `alias`
            alias_clusters.append(Cluster([expression], ispace, dspace))

        # Switch temporaries in the expression trees
        processed = [e.xreplace(subs) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
Example #2
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
                                              **kwargs):
        """
        Search for redundancies across the expressions and expose them
        to the later stages of the optimisation pipeline by introducing
        new temporaries of suitable rank.

        Two type of redundancies are sought:

            * Time-invariants, and
            * Across different space points

        Examples
        ========
        Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then:

        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        if cluster.is_sparse:
            return cluster

        # For more information about "aliases", refer to collect.__doc__
        mapper, aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        g = cluster.trace
        indices = g.space_indices
        time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()}

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in g.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(mapper.get(v.rhs, []))
            cost = estimate_cost(v, True) * naliases
            if cost >= self.thresholds['min-cost-alias'] and\
                    (naliases > 1 or time_invariants[v.rhs]):
                candidates[v.rhs] = k
            else:
                processed.append(v)

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = ClusterGroup()
        rules = OrderedDict()
        for origin, alias in aliases.items():
            if all(i not in candidates for i in alias.aliased):
                continue
            # Construct an iteration space suitable for /alias/
            intervals, sub_iterators, directions = cluster.ispace.args
            intervals = [
                Interval(i.dim, *alias.relaxed_diameter.get(i.dim, i.limits))
                for i in cluster.ispace.intervals
            ]
            ispace = IterationSpace(intervals, sub_iterators, directions)

            # Optimization: perhaps we can lift the cluster outside the time dimension
            if all(time_invariants[i] for i in alias.aliased):
                ispace = ispace.project(lambda i: not i.is_Time)

            # Build a symbolic function for /alias/
            intervals = ispace.intervals
            halo = [(abs(intervals[i].lower), abs(intervals[i].upper))
                    for i in indices]
            function = Array(name=template(), dimensions=indices, halo=halo)
            access = tuple(i - intervals[i].lower for i in indices)
            expression = Eq(Indexed(function.indexed, *access), origin)

            # Construct a data space suitable for /alias/
            mapper = detect_accesses(expression)
            parts = {
                k: IntervalGroup(build_intervals(v)).add(intervals)
                for k, v in mapper.items() if k
            }
            dspace = DataSpace([i.zero() for i in intervals], parts)

            # Create a new Cluster for /alias/
            alias_clusters.append(Cluster([expression], ispace, dspace))

            # Add substitution rules
            for aliased, distance in alias.with_distance:
                access = [
                    i - intervals[i].lower + j for i, j in distance
                    if i in indices
                ]
                temporary = Indexed(function.indexed, *tuple(access))
                rules[candidates[aliased]] = temporary
                rules[aliased] = temporary

        # Group clusters together if possible
        alias_clusters = groupby(alias_clusters).finalize()
        alias_clusters.sort(key=lambda i: i.is_dense)

        # Switch temporaries in the expression trees
        processed = [e.xreplace(rules) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
Example #3
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
                                              **kwargs):
        """
        Search for redundancies across the expressions and expose them
        to the later stages of the optimisation pipeline by introducing
        new temporaries of suitable rank.

        Two type of redundancies are sought:

            * Time-invariants, and
            * Across different space points

        Examples
        ========
        Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then:

        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        if cluster.is_sparse:
            return cluster

        # For more information about "aliases", refer to collect.__doc__
        mapper, aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        g = cluster.trace
        indices = g.space_indices
        time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()}

        # Template for captured redundancies
        shape = tuple(i.symbolic_extent for i in indices)
        make = lambda i: Array(
            name=template(i), shape=shape, dimensions=indices).indexed

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in g.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(mapper.get(v.rhs, []))
            cost = estimate_cost(v, True) * naliases
            if cost >= self.thresholds['min-cost-alias'] and\
                    (naliases > 1 or time_invariants[v.rhs]):
                candidates[v.rhs] = k
            else:
                processed.append(Eq(k, v.rhs))

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = ClusterGroup()
        rules = OrderedDict()
        for c, (origin, alias) in enumerate(aliases.items()):
            if all(i not in candidates for i in alias.aliased):
                continue
            function = make(c)
            # Build new Cluster
            expression = Eq(Indexed(function, *indices), origin)
            intervals, sub_iterators, directions = cluster.ispace.args
            # Adjust intervals
            intervals = intervals.subtract(
                alias.anti_stencil.boxify().negate())
            if all(time_invariants[i] for i in alias.aliased):
                intervals = intervals.drop(
                    [i for i in intervals.dimensions if i.is_Time])
            ispace = IterationSpace(intervals, sub_iterators, directions)
            alias_clusters.append(Cluster([expression], ispace))
            # Update substitution rules
            for aliased, distance in alias.with_distance:
                coordinates = [
                    sum([i, j]) for i, j in distance.items() if i in indices
                ]
                temporary = Indexed(function, *tuple(coordinates))
                rules[candidates[aliased]] = temporary
                rules[aliased] = temporary
        alias_clusters = groupby(alias_clusters).finalize()
        alias_clusters.sort(key=lambda i: i.is_dense)

        # Switch temporaries in the expression trees
        processed = [e.xreplace(rules) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
Example #4
0
def lower_schedule(schedule, meta, sregistry, ftemps):
    """
    Turn a Schedule into a sequence of Clusters.
    """
    if ftemps:
        make = TempFunction
    else:
        # Typical case -- the user does *not* "see" the CIRE-created temporaries
        make = Array

    clusters = []
    subs = {}
    for pivot, writeto, ispace, aliaseds, indicess, _ in schedule:
        name = sregistry.make_name()
        dtype = meta.dtype

        if writeto:
            # The Dimensions defining the shape of Array
            # Note: with SubDimensions, we may have the following situation:
            #
            # for zi = z_m + zi_ltkn; zi <= z_M - zi_rtkn; ...
            #   r[zi] = ...
            #
            # Instead of `r[zi - z_m - zi_ltkn]` we have just `r[zi]`, so we'll need
            # as much room as in `zi`'s parent to avoid going OOB
            # Aside from ugly generated code, the reason we do not rather shift the
            # indices is that it prevents future passes to transform the loop bounds
            # (e.g., MPI's comp/comm overlap does that)
            dimensions = [
                d.parent if d.is_Sub else d for d in writeto.itdimensions
            ]

            # The halo must be set according to the size of writeto space
            halo = [(abs(i.lower), abs(i.upper)) for i in writeto]

            # The indices used to write into the Array
            indices = []
            for i in writeto:
                try:
                    # E.g., `xs`
                    sub_iterators = writeto.sub_iterators[i.dim]
                    assert len(sub_iterators) == 1
                    indices.append(sub_iterators[0])
                except KeyError:
                    # E.g., `z` -- a non-shifted Dimension
                    indices.append(i.dim - i.lower)

            obj = make(name=name,
                       dimensions=dimensions,
                       halo=halo,
                       dtype=dtype)
            expression = Eq(obj[indices], uxreplace(pivot, subs))

            callback = lambda idx: obj[idx]
        else:
            # Degenerate case: scalar expression
            assert writeto.size == 0

            obj = Symbol(name=name, dtype=dtype)
            expression = Eq(obj, uxreplace(pivot, subs))

            callback = lambda idx: obj

        # Create the substitution rules for the aliasing expressions
        subs.update({
            aliased: callback(indices)
            for aliased, indices in zip(aliaseds, indicess)
        })

        # Construct the alias DataSpace
        accesses = detect_accesses(expression)
        parts = {
            k: IntervalGroup(build_intervals(v)).add(ispace.intervals).relaxed
            for k, v in accesses.items() if k
        }
        dspace = DataSpace(meta.dintervals, parts)

        # Drop or weaken parallelism if necessary
        properties = dict(meta.properties)
        for d, v in meta.properties.items():
            if any(i.is_Modulo for i in ispace.sub_iterators[d]):
                properties[d] = normalize_properties(v, {SEQUENTIAL})
            elif d not in writeto.dimensions:
                properties[d] = normalize_properties(
                    v, {PARALLEL_IF_PVT}) - {ROUNDABLE}

        # Finally, build the alias Cluster
        clusters.append(
            Cluster(expression, ispace, dspace, meta.guards, properties))

    return clusters, subs
Example #5
0
    def callback(self, clusters, prefix, cache=None):
        # Locate all Function accesses within the provided `clusters`
        accessmap = AccessMapper(clusters)

        # Create the buffers
        buffers = BufferBatch()
        for f, accessv in accessmap.items():
            # Has a buffer already been produced for `f`?
            if f in cache:
                continue

            # Is `f` really a buffering candidate?
            dims = self.callback0(f)
            if dims is None:
                continue
            if not all(any([i.dim in d._defines for i in prefix]) for d in dims):
                continue

            b = cache[f] = buffers.make(f, dims, accessv, self.options, self.sregistry)

        if not buffers:
            return clusters

        try:
            pd = prefix[-2].dim
        except IndexError:
            pd = None

        # Create Eqs to initialize buffers. Note: a buffer needs to be initialized
        # only if the buffered Function is read in at least one place or in the case
        # of non-uniform SubDimensions, to avoid uninitialized values to be copied-back
        # into the buffered Function
        noinit = self.options['buf-noinit']
        processed = []
        for b in buffers:
            if b.size == 1 and noinit:
                # Special case: avoid initialization if not strictly necessary
                # See docstring for more info about what this implies
                continue

            if b.is_read or not b.has_uniform_subdims:
                dims = b.function.dimensions
                lhs = b.indexed[[b.initmap.get(d, Map(d, d)).b for d in dims]]
                rhs = b.function[[b.initmap.get(d, Map(d, d)).f for d in dims]]

                expr = lower_exprs(Eq(lhs, rhs))
                ispace = b.writeto
                guards = {pd: GuardBound(d.root.symbolic_min, d.root.symbolic_max)
                          for d in b.contraction_mapper}
                properties = {d: {PARALLEL} for d in ispace.itdimensions}

                processed.append(
                    Cluster(expr, ispace, guards=guards, properties=properties)
                )

        # Substitution rules to replace buffered Functions with buffers
        subs = {}
        for b in buffers:
            for a in b.accessv.accesses:
                subs[a] = b.indexed[[b.index_mapper_flat.get(i, i) for i in a.indices]]

        for c in clusters:
            # If a buffer is read but never written, then we need to add
            # an Eq to step through the next slot
            # E.g., `ub[0, x] = u[time+2, x]`
            for b in buffers:
                if not b.is_readonly:
                    continue
                try:
                    c.exprs.index(b.firstread)
                except ValueError:
                    continue

                dims = b.function.dimensions
                lhs = b.indexed[[b.lastmap.get(d, Map(d, d)).b for d in dims]]
                rhs = b.function[[b.lastmap.get(d, Map(d, d)).f for d in dims]]

                expr = lower_exprs(uxreplace(Eq(lhs, rhs), b.subdims_mapper))
                ispace = b.written

                # Buffering creates a storage-related dependence along the
                # contracted dimensions
                properties = dict(c.properties)
                for d in b.contraction_mapper:
                    d = ispace[d].dim  # E.g., `time_sub -> time`
                    properties[d] = normalize_properties(properties[d], {SEQUENTIAL})

                processed.append(
                    c.rebuild(exprs=expr, ispace=ispace, properties=properties)
                )

            # Substitute buffered Functions with the newly created buffers
            exprs = [uxreplace(e, subs) for e in c.exprs]
            ispace = c.ispace
            for b in buffers:
                ispace = ispace.augment(b.sub_iterators)
            processed.append(c.rebuild(exprs=exprs, ispace=ispace))

            # Also append the copy-back if `e` is the last-write of some buffers
            # E.g., `u[time + 1, x] = ub[sb1, x]`
            for b in buffers:
                if b.is_readonly:
                    continue
                try:
                    c.exprs.index(b.lastwrite)
                except ValueError:
                    continue

                dims = b.function.dimensions
                lhs = b.function[[b.lastmap.get(d, Map(d, d)).f for d in dims]]
                rhs = b.indexed[[b.lastmap.get(d, Map(d, d)).b for d in dims]]

                expr = lower_exprs(uxreplace(Eq(lhs, rhs), b.subdims_mapper))
                ispace = b.written

                # Buffering creates a storage-related dependence along the
                # contracted dimensions
                properties = dict(c.properties)
                for d in b.contraction_mapper:
                    d = ispace[d].dim  # E.g., `time_sub -> time`
                    properties[d] = normalize_properties(properties[d], {SEQUENTIAL})

                processed.append(
                    c.rebuild(exprs=expr, ispace=ispace, properties=properties)
                )

        return processed