Esempio n. 1
0
    def written(self):
        """
        The `written` IterationSpace, that is the iteration space that must be
        iterated over in order to read all of the written buffer values.
        """
        intervals = []
        sub_iterators = {}
        directions = {}
        for dd in self.function.dimensions:
            d = dd.xreplace(self.subdims_mapper)
            try:
                interval, si, direction = self.itintervals_mapper[d]
            except KeyError:
                # E.g., d=time_sub
                assert d.is_NonlinearDerived
                d = d.root
                interval, si, direction = self.itintervals_mapper[d]
            intervals.append(interval)
            sub_iterators[d] = si + as_tuple(self.sub_iterators[d])
            directions[d] = direction

        relations = (tuple(i.dim for i in intervals),)
        intervals = IntervalGroup(intervals, relations=relations)

        return IterationSpace(intervals, sub_iterators, directions)
Esempio n. 2
0
    def writeto(self):
        """
        The `writeto` IterationSpace, that is the iteration space that must be
        iterated over in order to initialize the buffer.
        """
        intervals = []
        sub_iterators = {}
        directions = {}
        for d in self.buffer.dimensions:
            try:
                interval, si, direction = self.itintervals_mapper[d]
            except KeyError:
                # E.g., the contraction Dimension `db0`
                assert d in self.contraction_mapper.values()
                interval, si, direction = Interval(d, 0, 0), (), Forward
            intervals.append(interval)
            sub_iterators[d] = si
            directions[d] = direction

        relations = (self.buffer.dimensions,)
        intervals = IntervalGroup(intervals, relations=relations)

        return IterationSpace(intervals, sub_iterators, directions)
Esempio n. 3
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
                                              **kwargs):
        """
        Search aliasing expressions and capture them into vector temporaries.

        Examples
        --------
        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        # For more information about "aliases", refer to collect.__doc__
        aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        graph = FlowGraph(cluster.exprs)
        time_invariants = {
            v.rhs: graph.time_invariant(v)
            for v in graph.values()
        }

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in graph.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(aliases.get(v.rhs))
            cost = estimate_cost(v, True) * naliases
            test0 = lambda: cost >= self.MIN_COST_ALIAS and naliases > 1
            test1 = lambda: cost >= self.MIN_COST_ALIAS_INV and time_invariants[
                v.rhs]
            if test0() or test1():
                candidates[v.rhs] = k
            else:
                processed.append(v)

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = []
        subs = {}
        for origin, alias in aliases.items():
            if all(i not in candidates for i in alias.aliased):
                continue

            # The write-to Intervals
            writeto = [
                Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0)))
                for i in cluster.ispace.intervals if not i.dim.is_Time
            ]
            writeto = IntervalGroup(writeto)

            # Optimization: no need to retain a SpaceDimension if it does not
            # induce a flow/anti dependence (below, `i.offsets` captures this, by
            # telling how much halo will be needed to honour such dependences)
            dep_inducing = [i for i in writeto if any(i.offsets)]
            try:
                index = writeto.index(dep_inducing[0])
                writeto = IntervalGroup(writeto[index:])
            except IndexError:
                warning("Couldn't optimize some of the detected redundancies")

            # Create a temporary to store `alias`
            dimensions = [d.root for d in writeto.dimensions]
            halo = [(abs(i.lower), abs(i.upper)) for i in writeto]
            array = Array(name=template(),
                          dimensions=dimensions,
                          halo=halo,
                          dtype=cluster.dtype)

            # Build up the expression evaluating `alias`
            access = tuple(i.dim - i.lower for i in writeto)
            expression = Eq(array[access], origin)

            # Create the substitution rules so that we can use the newly created
            # temporary in place of the aliasing expressions
            for aliased, distance in alias.with_distance:
                assert all(i.dim in distance.labels for i in writeto)
                access = [i.dim - i.lower + distance[i.dim] for i in writeto]
                if aliased in candidates:
                    # It would *not* be in `candidates` if part of a composite alias
                    subs[candidates[aliased]] = array[access]
                subs[aliased] = array[access]

            # Construct the `alias` IterationSpace
            intervals, sub_iterators, directions = cluster.ispace.args
            ispace = IterationSpace(intervals.add(writeto), sub_iterators,
                                    directions)

            # Construct the `alias` DataSpace
            mapper = detect_accesses(expression)
            parts = {
                k: IntervalGroup(build_intervals(v)).add(ispace.intervals)
                for k, v in mapper.items() if k
            }
            dspace = DataSpace(cluster.dspace.intervals, parts)

            # Create a new Cluster for `alias`
            alias_clusters.append(Cluster([expression], ispace, dspace))

        # Switch temporaries in the expression trees
        processed = [e.xreplace(subs) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
Esempio n. 4
0
def _optimize_schedule_rotations(schedule, sregistry):
    """
    Transform the schedule such that the tensor temporaries "rotate" along
    the outermost Dimension. This trades a parallel Dimension for a smaller
    working set size.
    """
    # The rotations Dimension is the outermost
    ridx = 0

    rmapper = defaultdict(list)
    processed = []
    for k, group in groupby(schedule, key=lambda i: i.writeto):
        g = list(group)

        candidate = k[ridx]
        d = candidate.dim
        try:
            ds = schedule.dmapper[d]
        except KeyError:
            # Can't do anything if `d` isn't an IncrDimension over a block
            processed.extend(g)
            continue

        n = candidate.min_size
        assert n > 0

        iis = candidate.lower
        iib = candidate.upper

        ii = ModuloDimension('%sii' % d, ds, iis, incr=iib)
        cd = CustomDimension(name='%s%s' % (d, d),
                             symbolic_min=ii,
                             symbolic_max=iib,
                             symbolic_size=n)
        dsi = ModuloDimension('%si' % ds, cd, cd + ds - iis, n)

        mapper = OrderedDict()
        for i in g:
            # Update `indicess` to use `xs0`, `xs1`, ...
            mds = []
            for indices in i.indicess:
                v = indices[ridx]
                try:
                    md = mapper[v]
                except KeyError:
                    name = sregistry.make_name(prefix='%sr' % d.name)
                    md = mapper.setdefault(v, ModuloDimension(name, ds, v, n))
                mds.append(md)
            indicess = [
                indices[:ridx] + [md] + indices[ridx + 1:]
                for md, indices in zip(mds, i.indicess)
            ]

            # Update `writeto` by switching `d` to `dsi`
            intervals = k.intervals.switch(d, dsi).zero(dsi)
            sub_iterators = dict(k.sub_iterators)
            sub_iterators[d] = dsi
            writeto = IterationSpace(intervals, sub_iterators)

            # Transform `alias` by adding `i`
            alias = i.alias.xreplace({d: d + cd})

            # Extend `ispace` to iterate over rotations
            d1 = writeto[ridx +
                         1].dim  # Note: we're by construction in-bounds here
            intervals = IntervalGroup(Interval(cd, 0, 0),
                                      relations={(d, cd, d1)})
            rispace = IterationSpace(intervals, {cd: dsi}, {cd: Forward})
            aispace = i.ispace.zero(d)
            aispace = aispace.augment({d: mds + [ii]})
            ispace = IterationSpace.union(rispace, aispace)

            processed.append(
                ScheduledAlias(alias, writeto, ispace, i.aliaseds, indicess))

        # Update the rotations mapper
        rmapper[d].extend(list(mapper.values()))

    return Schedule(*processed, dmapper=schedule.dmapper, rmapper=rmapper)
Esempio n. 5
0
def lower_aliases(cluster, aliases, in_writeto, maxpar):
    """
    Create a Schedule from an AliasMapper.
    """
    dmapper = {}
    processed = []
    for alias, v in aliases.items():
        imapper = {
            **{i.dim: i
               for i in v.intervals},
            **{
                i.dim.parent: i
                for i in v.intervals if i.dim.is_NonlinearDerived
            }
        }

        intervals = []
        writeto = []
        sub_iterators = {}
        indicess = [[] for _ in v.distances]
        for i in cluster.ispace.intervals:
            try:
                interval = imapper[i.dim]
            except KeyError:
                # E.g., `x0_blk0` or (`a[y_m+1]` => `y not in imapper`)
                intervals.append(i)
                continue

            assert i.stamp >= interval.stamp

            if not (writeto or interval != interval.zero()
                    or in_writeto(i.dim, cluster)):
                # The alias doesn't require a temporary Dimension along i.dim
                intervals.append(i)
                continue

            assert not i.dim.is_NonlinearDerived

            # `i.dim` is necessarily part of the write-to region, so
            # we have to adjust the Interval's stamp. For example, consider
            # `i=x[0,0]<1>` and `interval=x[-4,4]<0>`; here we need to
            # use `<1>` as stamp, which is what appears in `cluster`
            interval = interval.lift(i.stamp)

            # We further bump the interval stamp if we were requested to trade
            # fusion for more collapse-parallelism
            interval = interval.lift(interval.stamp + int(maxpar))

            writeto.append(interval)
            intervals.append(interval)

            if i.dim.is_Incr:
                # Suitable IncrDimensions must be used to avoid OOB accesses.
                # E.g., r[xs][ys][z] => both `xs` and `ys` must be initialized such
                # that all accesses are within bounds. This requires traversing the
                # hierarchy of IncrDimensions to set `xs` (`ys`) in a way that
                # consecutive blocks access consecutive regions in `r` (e.g.,
                # `xs=x0_blk1-x0_blk0` with `blocklevels=2`; `xs=0` with
                # `blocklevels=1`, that is it degenerates in this case)
                try:
                    d = dmapper[i.dim]
                except KeyError:
                    dd = i.dim.parent
                    assert dd.is_Incr
                    if dd.parent.is_Incr:
                        # An IncrDimension in between IncrDimensions
                        m = i.dim.symbolic_min - i.dim.parent.symbolic_min
                    else:
                        m = 0
                    d = dmapper[i.dim] = IncrDimension("%ss" % i.dim.name,
                                                       i.dim, m,
                                                       dd.symbolic_size, 1,
                                                       dd.step)
                sub_iterators[i.dim] = d
            else:
                d = i.dim

            # Given the iteration `interval`, lower distances to indices
            for distance, indices in zip(v.distances, indicess):
                indices.append(d - interval.lower + distance[interval.dim])

        # The alias write-to space
        writeto = IterationSpace(IntervalGroup(writeto), sub_iterators)

        # The alias iteration space
        intervals = IntervalGroup(intervals, cluster.ispace.relations)
        ispace = IterationSpace(intervals, cluster.sub_iterators,
                                cluster.directions)
        ispace = ispace.augment(sub_iterators)

        processed.append(
            ScheduledAlias(alias, writeto, ispace, v.aliaseds, indicess))

    # The [ScheduledAliases] must be ordered so as to reuse as many of the
    # `cluster`'s IterationIntervals as possible in order to honor the
    # write-to region. Another fundamental reason for ordering is to ensure
    # deterministic code generation
    processed = sorted(processed, key=lambda i: cit(cluster.ispace, i.ispace))

    return Schedule(*processed, dmapper=dmapper)
Esempio n. 6
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
                                              **kwargs):
        """
        Search for redundancies across the expressions and expose them
        to the later stages of the optimisation pipeline by introducing
        new temporaries of suitable rank.

        Two type of redundancies are sought:

            * Time-invariants, and
            * Across different space points

        Examples
        ========
        Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then:

        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        if cluster.is_sparse:
            return cluster

        # For more information about "aliases", refer to collect.__doc__
        mapper, aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        g = cluster.trace
        indices = g.space_indices
        time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()}

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in g.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(mapper.get(v.rhs, []))
            cost = estimate_cost(v, True) * naliases
            if cost >= self.thresholds['min-cost-alias'] and\
                    (naliases > 1 or time_invariants[v.rhs]):
                candidates[v.rhs] = k
            else:
                processed.append(v)

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = ClusterGroup()
        rules = OrderedDict()
        for origin, alias in aliases.items():
            if all(i not in candidates for i in alias.aliased):
                continue
            # Construct an iteration space suitable for /alias/
            intervals, sub_iterators, directions = cluster.ispace.args
            intervals = [
                Interval(i.dim, *alias.relaxed_diameter.get(i.dim, i.limits))
                for i in cluster.ispace.intervals
            ]
            ispace = IterationSpace(intervals, sub_iterators, directions)

            # Optimization: perhaps we can lift the cluster outside the time dimension
            if all(time_invariants[i] for i in alias.aliased):
                ispace = ispace.project(lambda i: not i.is_Time)

            # Build a symbolic function for /alias/
            intervals = ispace.intervals
            halo = [(abs(intervals[i].lower), abs(intervals[i].upper))
                    for i in indices]
            function = Array(name=template(), dimensions=indices, halo=halo)
            access = tuple(i - intervals[i].lower for i in indices)
            expression = Eq(Indexed(function.indexed, *access), origin)

            # Construct a data space suitable for /alias/
            mapper = detect_accesses(expression)
            parts = {
                k: IntervalGroup(build_intervals(v)).add(intervals)
                for k, v in mapper.items() if k
            }
            dspace = DataSpace([i.zero() for i in intervals], parts)

            # Create a new Cluster for /alias/
            alias_clusters.append(Cluster([expression], ispace, dspace))

            # Add substitution rules
            for aliased, distance in alias.with_distance:
                access = [
                    i - intervals[i].lower + j for i, j in distance
                    if i in indices
                ]
                temporary = Indexed(function.indexed, *tuple(access))
                rules[candidates[aliased]] = temporary
                rules[aliased] = temporary

        # Group clusters together if possible
        alias_clusters = groupby(alias_clusters).finalize()
        alias_clusters.sort(key=lambda i: i.is_dense)

        # Switch temporaries in the expression trees
        processed = [e.xreplace(rules) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
Esempio n. 7
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template, **kwargs):
        """
        Search for redundancies across the expressions and expose them
        to the later stages of the optimisation pipeline by introducing
        new temporaries of suitable rank.

        Two type of redundancies are sought:

            * Time-invariants, and
            * Across different space points

        Examples
        ========
        Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then:

        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        if cluster.is_sparse:
            return cluster

        # For more information about "aliases", refer to collect.__doc__
        mapper, aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        g = cluster.trace
        indices = g.space_indices
        time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()}

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in g.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(mapper.get(v.rhs, []))
            cost = estimate_cost(v, True)*naliases
            if cost >= self.MIN_COST_ALIAS and (naliases > 1 or time_invariants[v.rhs]):
                candidates[v.rhs] = k
            else:
                processed.append(v)

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = ClusterGroup()
        rules = OrderedDict()
        for origin, alias in aliases.items():
            if all(i not in candidates for i in alias.aliased):
                continue
            # Construct an iteration space suitable for /alias/
            intervals, sub_iterators, directions = cluster.ispace.args
            intervals = [Interval(i.dim, *alias.relaxed_diameter.get(i.dim, i.limits))
                         for i in cluster.ispace.intervals]
            ispace = IterationSpace(intervals, sub_iterators, directions)

            # Optimization: perhaps we can lift the cluster outside the time dimension
            if all(time_invariants[i] for i in alias.aliased):
                ispace = ispace.project(lambda i: not i.is_Time)

            # Build a symbolic function for /alias/
            intervals = ispace.intervals
            halo = [(abs(intervals[i].lower), abs(intervals[i].upper)) for i in indices]
            function = Array(name=template(), dimensions=indices, halo=halo)
            access = tuple(i - intervals[i].lower for i in indices)
            expression = Eq(function[access], origin)

            # Construct a data space suitable for /alias/
            mapper = detect_accesses(expression)
            parts = {k: IntervalGroup(build_intervals(v)).add(intervals)
                     for k, v in mapper.items() if k}
            dspace = DataSpace([i.zero() for i in intervals], parts)

            # Create a new Cluster for /alias/
            alias_clusters.append(Cluster([expression], ispace, dspace))

            # Add substitution rules
            for aliased, distance in alias.with_distance:
                access = [i - intervals[i].lower + j for i, j in distance if i in indices]
                rules[candidates[aliased]] = function[access]
                rules[aliased] = function[access]

        # Group clusters together if possible
        alias_clusters = groupby(alias_clusters).finalize()
        alias_clusters.sort(key=lambda i: i.is_dense)

        # Switch temporaries in the expression trees
        processed = [e.xreplace(rules) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
Esempio n. 8
0
    def iter(self, cluster, max_par):
        """
        The aliases can legally be scheduled in many different orders, but we
        privilege the one that minimizes storage while maximizing fusion.
        """
        items = []
        for alias, (intervals, aliaseds, distances) in self.items():
            mapper = {i.dim: i for i in intervals}
            mapper.update({
                i.dim.parent: i
                for i in intervals if i.dim.is_NonlinearDerived
            })

            # Becomes True as soon as a Dimension in `ispace` is found to
            # be independent of `intervals`
            flag = False
            iteron = []
            writeto = []
            for i in cluster.ispace.intervals:
                try:
                    interval = mapper[i.dim]
                except KeyError:
                    if not any(i.dim in d._defines for d in mapper):
                        # E.g., `t[0,0]<0>` in the case of t-invariant aliases,
                        # whereas if `i.dim` is `x0_blk0` in `x0_blk0[0,0]<0>` then
                        # we would not enter here
                        flag = True

                    iteron.append(i)
                    continue

                assert i.stamp >= interval.stamp

                # Does `i.dim` actually need to be a write-to Dimension ?
                if flag or interval != interval.zero():
                    # Yes, so we also have to adjust the Interval's stamp.
                    # E.g., `i=x[0,0]<1>` and `interval=x[-4,4]<0>`. We need to
                    # use `<1>` which is the actual stamp used in `cluster`
                    interval = interval.lift(i.stamp)
                    iteron.append(interval)
                    writeto.append(interval)
                    flag = True
                elif max_par and PARALLEL in cluster.properties[i.dim]:
                    # Not necessarily, but with `max_par` the user is
                    # expressing the wish to trade-off storage for parallelism
                    interval = interval.lift(i.stamp + 1)
                    iteron.append(interval)
                    writeto.append(interval)
                    flag = True
                else:
                    iteron.append(i)

            if writeto:
                writeto = IntervalGroup(writeto, cluster.ispace.relations)
            else:
                # E.g., an `alias` having 0-distance along all Dimensions
                writeto = IntervalGroup(intervals, cluster.ispace.relations)

            # Construct the IterationSpace within which the alias will be computed
            ispace = IterationSpace(
                IntervalGroup(iteron, cluster.ispace.relations),
                cluster.sub_iterators, cluster.directions)
            ispace = ispace.augment(self.index_mapper)

            items.append((alias, writeto, ispace, aliaseds, distances))

        queue = list(items)
        while queue:
            # Shortest write-to region first
            item = min(queue, key=lambda i: len(i[1]))
            queue.remove(item)
            yield item
Esempio n. 9
0
def make_schedule(cluster, aliases, in_writeto, options):
    """
    Create a Schedule from an AliasMapper.
    """
    max_par = options['cire-maxpar']

    dmapper = {}
    processed = []
    for alias, v in aliases.items():
        imapper = {**{i.dim: i for i in v.intervals},
                   **{i.dim.parent: i for i in v.intervals if i.dim.is_NonlinearDerived}}

        intervals = []
        writeto = []
        sub_iterators = {}
        indicess = [[] for _ in v.distances]
        for i in cluster.ispace.intervals:
            try:
                interval = imapper[i.dim]
            except KeyError:
                # E.g., `x0_blk0` or (`a[y_m+1]` => `y not in imapper`)
                intervals.append(i)
                continue

            assert i.stamp >= interval.stamp

            if not (writeto or interval != interval.zero() or in_writeto(i.dim, cluster)):
                # The alias doesn't require a temporary Dimension along i.dim
                intervals.append(i)
                continue

            assert not i.dim.is_NonlinearDerived

            # `i.dim` is necessarily part of the write-to region, so
            # we have to adjust the Interval's stamp. For example, consider
            # `i=x[0,0]<1>` and `interval=x[-4,4]<0>`; here we need to
            # use `<1>` as stamp, which is what appears in `cluster`
            interval = interval.lift(i.stamp)

            # We further bump the interval stamp if we were requested to trade
            # fusion for more collapse-parallelism
            interval = interval.lift(interval.stamp + int(max_par))

            writeto.append(interval)
            intervals.append(interval)

            if i.dim.is_Incr:
                # Suitable ShiftedDimensions must be used to avoid OOB accesses.
                # E.g., r[xs][ys][z] => both `xs` and `ys` must start at 0,
                # not at `x0_blk0`
                try:
                    d = dmapper[i.dim]
                except KeyError:
                    d = dmapper[i.dim] = ShiftedDimension(i.dim, name="%ss" % i.dim.name)
                sub_iterators[i.dim] = d
            else:
                d = i.dim

            # Given the iteration `interval`, lower distances to indices
            for distance, indices in zip(v.distances, indicess):
                indices.append(d - interval.lower + distance[interval.dim])

        # The alias write-to space
        writeto = IterationSpace(IntervalGroup(writeto), sub_iterators)

        # The alias iteration space
        intervals = IntervalGroup(intervals, cluster.ispace.relations)
        ispace = IterationSpace(intervals, cluster.sub_iterators, cluster.directions)
        ispace = ispace.augment(sub_iterators)

        processed.append(ScheduledAlias(alias, writeto, ispace, v.aliaseds, indicess))

    # Sort by write-to region for deterministic code generation
    processed = sorted(processed, key=lambda i: i.writeto)

    return Schedule(*processed, dmapper=dmapper)
Esempio n. 10
0
def process(candidates, aliases, cluster, template):
    """
    Create Clusters from aliasing expressions.
    """
    clusters = []
    subs = {}
    for origin, alias in aliases.items():
        if all(i not in candidates for i in alias.aliased):
            continue

        # The write-to Intervals
        writeto = [
            Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0)))
            for i in cluster.ispace.intervals if not i.dim.is_Time
        ]
        writeto = IntervalGroup(writeto)

        # Optimization: no need to retain a SpaceDimension if it does not
        # induce a flow/anti dependence (below, `i.offsets` captures this, by
        # telling how much halo will be required to honour such dependences)
        dep_inducing = [i for i in writeto if any(i.offsets)]
        try:
            index = writeto.index(dep_inducing[0])
            writeto = IntervalGroup(writeto[index:])
        except IndexError:
            perf_adv("Could not optimize some of the detected redundancies")

        # Create a temporary to store `alias`
        dimensions = [d.root for d in writeto.dimensions]
        halo = [(abs(i.lower), abs(i.upper)) for i in writeto]
        array = Array(name=template(),
                      dimensions=dimensions,
                      halo=halo,
                      dtype=cluster.dtype)

        # Build up the expression evaluating `alias`
        access = tuple(i.dim - i.lower for i in writeto)
        expression = Eq(array[access], origin.xreplace(subs))

        # Create the substitution rules so that we can use the newly created
        # temporary in place of the aliasing expressions
        for aliased, distance in alias.with_distance:
            assert all(i.dim in distance.labels for i in writeto)
            access = [i.dim - i.lower + distance[i.dim] for i in writeto]
            if aliased in candidates:
                # It would *not* be in `candidates` if part of a composite alias
                subs[candidates[aliased]] = array[access]
            subs[aliased] = array[access]

        # Construct the `alias` IterationSpace
        intervals, sub_iterators, directions = cluster.ispace.args
        ispace = IterationSpace(intervals.add(writeto), sub_iterators,
                                directions)

        # Optimize the `alias` IterationSpace: if possible, the innermost
        # IterationInterval is rounded up to a multiple of the vector length
        try:
            it = ispace.itintervals[-1]
            if ROUNDABLE in cluster.properties[it.dim]:
                from devito.parameters import configuration
                vl = configuration['platform'].simd_items_per_reg(
                    cluster.dtype)
                ispace = ispace.add(Interval(it.dim, 0, it.interval.size % vl))
        except (TypeError, KeyError):
            pass

        # Construct the `alias` DataSpace
        mapper = detect_accesses(expression)
        parts = {
            k: IntervalGroup(build_intervals(v)).add(ispace.intervals)
            for k, v in mapper.items() if k
        }
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Create a new Cluster for `alias`
        clusters.append(
            cluster.rebuild(exprs=[expression], ispace=ispace, dspace=dspace))

    return clusters, subs
Esempio n. 11
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
                                              **kwargs):
        """
        Search for redundancies across the expressions and expose them
        to the later stages of the optimisation pipeline by introducing
        new temporaries of suitable rank.

        Two type of redundancies are sought:

            * Time-invariants, and
            * Across different space points

        Examples
        ========
        Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then:

        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        if cluster.is_sparse:
            return cluster

        # For more information about "aliases", refer to collect.__doc__
        mapper, aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        g = cluster.trace
        indices = g.space_indices
        time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()}

        # Template for captured redundancies
        shape = tuple(i.symbolic_extent for i in indices)
        make = lambda i: Array(
            name=template(i), shape=shape, dimensions=indices).indexed

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in g.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(mapper.get(v.rhs, []))
            cost = estimate_cost(v, True) * naliases
            if cost >= self.thresholds['min-cost-alias'] and\
                    (naliases > 1 or time_invariants[v.rhs]):
                candidates[v.rhs] = k
            else:
                processed.append(Eq(k, v.rhs))

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = ClusterGroup()
        rules = OrderedDict()
        for c, (origin, alias) in enumerate(aliases.items()):
            if all(i not in candidates for i in alias.aliased):
                continue
            function = make(c)
            # Build new Cluster
            expression = Eq(Indexed(function, *indices), origin)
            intervals, sub_iterators, directions = cluster.ispace.args
            # Adjust intervals
            intervals = intervals.subtract(
                alias.anti_stencil.boxify().negate())
            if all(time_invariants[i] for i in alias.aliased):
                intervals = intervals.drop(
                    [i for i in intervals.dimensions if i.is_Time])
            ispace = IterationSpace(intervals, sub_iterators, directions)
            alias_clusters.append(Cluster([expression], ispace))
            # Update substitution rules
            for aliased, distance in alias.with_distance:
                coordinates = [
                    sum([i, j]) for i, j in distance.items() if i in indices
                ]
                temporary = Indexed(function, *tuple(coordinates))
                rules[candidates[aliased]] = temporary
                rules[aliased] = temporary
        alias_clusters = groupby(alias_clusters).finalize()
        alias_clusters.sort(key=lambda i: i.is_dense)

        # Switch temporaries in the expression trees
        processed = [e.xreplace(rules) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]