Esempio n. 1
0
        def callback():
            # Derivatives must be evaluated before the introduction of indirect accesses
            try:
                _expr = expr.evaluate
            except AttributeError:
                # E.g., a generic SymPy expression or a number
                _expr = expr

            variables = list(retrieve_function_carriers(_expr))

            # Need to get origin of the field in case it is staggered
            # TODO: handle each variable staggereing spearately
            field_offset = variables[0].origin
            # List of indirection indices for all adjacent grid points
            idx_subs, temps = self._interpolation_indices(variables, offset,
                                                          field_offset=field_offset)

            # Substitute coordinate base symbols into the interpolation coefficients
            args = [_expr.xreplace(v_sub) * b.xreplace(v_sub)
                    for b, v_sub in zip(self._interpolation_coeffs, idx_subs)]

            # Accumulate point-wise contributions into a temporary
            rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
            summands = [Eq(rhs, 0., implicit_dims=self.sfunction.dimensions)]
            summands.extend([Inc(rhs, i, implicit_dims=self.sfunction.dimensions)
                            for i in args])

            # Write/Incr `self`
            lhs = self.sfunction.subs(self_subs)
            last = [Inc(lhs, rhs)] if increment else [Eq(lhs, rhs)]

            return temps + summands + last
Esempio n. 2
0
def _buffering(expressions, callback, options):
    async_degree = options['buf-async-degree']

    # Locate all Function accesses within the provided `expressions`
    accessmap = AccessMapper(expressions)

    # Create the buffers
    buffers = []
    for n, (f, accessv) in enumerate(accessmap.items()):
        dims = callback(f)
        if dims is None:
            # Not a buffer candidate
            continue
        if accessv.lastwrite is None:
            # Read-only Functions cannot be buffering candidates
            continue
        buffers.append(Buffer(f, dims, accessv, n, async_degree))

    # Create Eqs to initialize buffers. Note: a buffer needs to be initialized
    # only if the buffered Function is read in at least one place or in the case
    # of non-uniform SubDimensions, to avoid uninitialized values to be copied-back
    # into the buffered Function
    processed = [Eq(b.indexify(), b.function.subs(b.contraction_mapper))
                 for b in buffers if b.is_read or not b.has_uniform_subdims]

    # Substitution rules to replace buffered Functions with buffers
    subs = {}
    for b in buffers:
        for a in b.accessv.accesses:
            subs[a] = b.indexify(a.indices)

    # Create Eqs to copy back buffers into their buffered Functions
    for e in expressions:
        processed.append(uxreplace(e, subs))

        # We also append the copy-back if `e` is the last-write of some buffers
        for b in buffers:
            if e is b.accessv.lastwrite:
                items = list(zip(e.lhs.indices, b.function.dimensions))
                lhs = b.function[[i if i in b.index_mapper else d for i, d in items]]
                rhs = b.indexed[[b.index_mapper.get(i, d) for i, d in items]]
                if b.subdims_mapper:
                    processed.append(uxreplace(Eq(lhs, rhs), b.subdims_mapper))
                else:
                    processed.append(Eq(lhs, rhs))
                break

    return processed
Esempio n. 3
0
    def _add_implicit(cls, expressions):
        """
        Create and add any associated implicit expressions.

        Implicit expressions are those not explicitly defined by the user
        but instead are requisites of some specified functionality.
        """
        processed = []
        seen = set()
        for e in expressions:
            if e.subdomain:
                try:
                    dims = [d.root for d in e.free_symbols if isinstance(d, Dimension)]
                    sub_dims = [d.root for d in e.subdomain.dimensions]
                    sub_dims.append(e.subdomain.implicit_dimension)
                    dims = [d for d in dims if d not in frozenset(sub_dims)]
                    dims.append(e.subdomain.implicit_dimension)
                    if e.subdomain not in seen:
                        processed.extend([i.func(*i.args, implicit_dims=dims) for i in
                                          e.subdomain._create_implicit_exprs()])
                        seen.add(e.subdomain)
                    dims.extend(e.subdomain.dimensions)
                    new_e = Eq(e.lhs, e.rhs, subdomain=e.subdomain, implicit_dims=dims)
                    processed.append(new_e)
                except AttributeError:
                    processed.append(e)
            else:
                processed.append(e)
        return processed
Esempio n. 4
0
    def _interpolation_indices(self, variables, offset=0, field_offset=0):
        """
        Generate interpolation indices for the DiscreteFunctions in ``variables``.
        """
        index_matrix, points = self.sfunction._index_matrix(offset)

        idx_subs = []
        for i, idx in enumerate(index_matrix):
            # Introduce ConditionalDimension so that we don't go OOB
            mapper = {}
            for j, d in zip(idx, self.grid.dimensions):
                p = points[j]
                lb = sympy.And(p >= d.symbolic_min - self.sfunction._radius,
                               evaluate=False)
                ub = sympy.And(p <= d.symbolic_max + self.sfunction._radius,
                               evaluate=False)
                condition = sympy.And(lb, ub, evaluate=False)
                mapper[d] = ConditionalDimension(p.name,
                                                 self.sfunction._sparse_dim,
                                                 condition=condition,
                                                 indirect=True)

            # Track Indexed substitutions
            idx_subs.append(mapper)

        # Temporaries for the position
        temps = [
            Eq(v, k, implicit_dims=self.sfunction.dimensions)
            for k, v in self.sfunction._position_map.items()
        ]
        # Temporaries for the indirection dimensions
        temps.extend([
            Eq(v,
               k.subs(self.sfunction._position_map),
               implicit_dims=self.sfunction.dimensions)
            for k, v in points.items()
        ])
        # Temporaries for the coefficients
        temps.extend([
            Eq(p,
               c.subs(self.sfunction._position_map),
               implicit_dims=self.sfunction.dimensions)
            for p, c in zip(self.sfunction._point_symbols,
                            self.sfunction._coordinate_bases(field_offset))
        ])

        return idx_subs, temps
Esempio n. 5
0
 def __new__(cls, *args, **kwargs):
     if len(args) == 1:
         input_expr = args[0]
         assert isinstance(input_expr, Eq)
         obj = LoweredEq(input_expr)
     elif len(args) == 2:
         obj = LoweredEq(Eq(*args, evaluate=False))
     else:
         raise ValueError("Cannot construct DummyEq from args=%s" % str(args))
     return ClusterizedEq.__new__(cls, obj, ispace=obj.ispace, dspace=obj.dspace)
Esempio n. 6
0
        def callback():
            _expr = indexify(expr)
            _field = indexify(field)

            p, _ = self.obj.gridpoints.indices
            dim_subs = []
            coeffs = []
            for i, d in enumerate(self.obj.grid.dimensions):
                rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
                dim_subs.append((d, INT(rd + self.obj.gridpoints[p, i])))
                coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
            rhs = prod(coeffs) * _expr
            _field = _field.subs(dim_subs)
            return [Eq(_field, _field + rhs.subs(dim_subs))]
Esempio n. 7
0
        def callback():
            _expr = indexify(expr)

            p, _, _ = self.obj.interpolation_coeffs.indices
            dim_subs = []
            coeffs = []
            for i, d in enumerate(self.obj.grid.dimensions):
                rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
                dim_subs.append((d, INT(rd + self.obj.gridpoints[p, i])))
                coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
            # Apply optional time symbol substitutions to lhs of assignment
            lhs = self.obj.subs(self_subs)
            rhs = prod(coeffs) * _expr.subs(dim_subs)

            return [Eq(lhs, lhs + rhs)]
Esempio n. 8
0
def generate_implicit_exprs(expressions):
    """
    Create and add implicit expressions.

    Implicit expressions are those not explicitly defined by the user
    but instead are requisites of some specified functionality.

    Currently, implicit expressions stem from the following:

        * MultiSubDomains attached to input equations.
    """
    found = {}
    processed = []
    for e in expressions:
        if e.subdomain:
            try:
                dims = [
                    d.root for d in e.free_symbols if isinstance(d, Dimension)
                ]
                sub_dims = [d.root for d in e.subdomain.dimensions]
                sub_dims.extend(e.subdomain.implicit_dimensions)
                dims = [d for d in dims if d not in frozenset(sub_dims)]
                dims.extend(e.subdomain.implicit_dimensions)
                if e.subdomain not in found:
                    grid = list(retrieve_functions(e, mode='unique'))[0].grid
                    found[e.subdomain] = [
                        i.func(*i.args, implicit_dims=dims)
                        for i in e.subdomain._create_implicit_exprs(grid)
                    ]
                processed.extend(found[e.subdomain])
                dims.extend(e.subdomain.dimensions)
                new_e = Eq(e.lhs,
                           e.rhs,
                           subdomain=e.subdomain,
                           implicit_dims=dims)
                processed.append(new_e)
            except AttributeError:
                # Not a MultiSubDomain
                processed.append(e)
        else:
            processed.append(e)
    return processed
Esempio n. 9
0
def process(cluster, chosen, aliases, sregistry, platform):
    clusters = []
    subs = {}
    for alias, writeto, aliaseds, distances in aliases.iter(cluster.ispace):
        if all(i not in chosen for i in aliaseds):
            continue

        # The Dimensions defining the shape of Array
        # Note: with SubDimensions, we may have the following situation:
        #
        # for zi = z_m + zi_ltkn; zi <= z_M - zi_rtkn; ...
        #   r[zi] = ...
        #
        # Instead of `r[zi - z_m - zi_ltkn]` we have just `r[zi]`, so we'll need
        # as much room as in `zi`'s parent to avoid going OOB
        # Aside from ugly generated code, the reason we do not rather shift the
        # indices is that it prevents future passes to transform the loop bounds
        # (e.g., MPI's comp/comm overlap does that)
        dimensions = [d.parent if d.is_Sub else d for d in writeto.dimensions]

        # The halo of the Array
        halo = [(abs(i.lower), abs(i.upper)) for i in writeto]

        # The data sharing mode of the Array
        sharing = 'local' if any(d.is_Incr for d in writeto.dimensions) else 'shared'

        # Finally create the temporary Array that will store `alias`
        array = Array(name=sregistry.make_name(), dimensions=dimensions, halo=halo,
                      dtype=cluster.dtype, sharing=sharing)

        # The access Dimensions may differ from `writeto.dimensions`. This may
        # happen e.g. if ShiftedDimensions are introduced (`a[x,y]` -> `a[xs,y]`)
        adims = [aliases.index_mapper.get(d, d) for d in writeto.dimensions]

        # The expression computing `alias`
        adims = [aliases.index_mapper.get(d, d) for d in writeto.dimensions]  # x -> xs
        indices = [d - (0 if writeto[d].is_Null else writeto[d].lower) for d in adims]
        expression = Eq(array[indices], uxreplace(alias, subs))

        # Create the substitution rules so that we can use the newly created
        # temporary in place of the aliasing expressions
        for aliased, distance in zip(aliaseds, distances):
            assert all(i.dim in distance.labels for i in writeto)

            indices = [d - i.lower + distance[i.dim] for d, i in zip(adims, writeto)]
            subs[aliased] = array[indices]

            if aliased in chosen:
                subs[chosen[aliased]] = array[indices]
            else:
                # Perhaps part of a composite alias ?
                pass

        # Construct the `alias` IterationSpace
        ispace = cluster.ispace.add(writeto).augment(aliases.index_mapper)

        # Optimization: if possible, the innermost IterationInterval is
        # rounded up to a multiple of the vector length
        try:
            it = ispace.itintervals[-1]
            if ROUNDABLE in cluster.properties[it.dim]:
                vl = platform.simd_items_per_reg(cluster.dtype)
                ispace = ispace.add(Interval(it.dim, 0, it.interval.size % vl))
        except (TypeError, KeyError):
            pass

        # Construct the `alias` DataSpace
        accesses = detect_accesses(expression)
        parts = {k: IntervalGroup(build_intervals(v)).add(ispace.intervals).relaxed
                 for k, v in accesses.items() if k}
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Finally, build a new Cluster for `alias`
        built = cluster.rebuild(exprs=expression, ispace=ispace, dspace=dspace)
        clusters.insert(0, built)

    return clusters, subs
Esempio n. 10
0
def lower_schedule(cluster, schedule, sregistry, options):
    """
    Turn a Schedule into a sequence of Clusters.
    """
    ftemps = options['cire-ftemps']

    if ftemps:
        make = TempFunction
    else:
        # Typical case -- the user does *not* "see" the CIRE-created temporaries
        make = Array

    clusters = []
    subs = {}
    for alias, writeto, ispace, aliaseds, indicess in schedule:
        # Basic info to create the temporary that will hold the alias
        name = sregistry.make_name()
        dtype = cluster.dtype

        if writeto:
            # The Dimensions defining the shape of Array
            # Note: with SubDimensions, we may have the following situation:
            #
            # for zi = z_m + zi_ltkn; zi <= z_M - zi_rtkn; ...
            #   r[zi] = ...
            #
            # Instead of `r[zi - z_m - zi_ltkn]` we have just `r[zi]`, so we'll need
            # as much room as in `zi`'s parent to avoid going OOB
            # Aside from ugly generated code, the reason we do not rather shift the
            # indices is that it prevents future passes to transform the loop bounds
            # (e.g., MPI's comp/comm overlap does that)
            dimensions = [
                d.parent if d.is_Sub else d for d in writeto.itdimensions
            ]

            # The halo must be set according to the size of writeto space
            halo = [(abs(i.lower), abs(i.upper)) for i in writeto]

            # The indices used to write into the Array
            indices = []
            for i in writeto:
                try:
                    # E.g., `xs`
                    sub_iterators = writeto.sub_iterators[i.dim]
                    assert len(sub_iterators) == 1
                    indices.append(sub_iterators[0])
                except KeyError:
                    # E.g., `z` -- a non-shifted Dimension
                    indices.append(i.dim - i.lower)

            obj = make(name=name,
                       dimensions=dimensions,
                       halo=halo,
                       dtype=dtype)
            expression = Eq(obj[indices], alias)

            callback = lambda idx: obj[idx]
        else:
            # Degenerate case: scalar expression
            assert writeto.size == 0

            obj = Symbol(name=name, dtype=dtype)
            expression = Eq(obj, alias)

            callback = lambda idx: obj

        # Create the substitution rules for the aliasing expressions
        subs.update({
            aliased: callback(indices)
            for aliased, indices in zip(aliaseds, indicess)
        })

        # Construct the `alias` DataSpace
        accesses = detect_accesses(expression)
        parts = {
            k: IntervalGroup(build_intervals(v)).add(ispace.intervals).relaxed
            for k, v in accesses.items() if k
        }
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Drop or weaken parallelism if necessary
        properties = dict(cluster.properties)
        for d, v in cluster.properties.items():
            if any(i.is_Modulo for i in ispace.sub_iterators[d]):
                properties[d] = normalize_properties(v, {SEQUENTIAL})
            elif d not in writeto.dimensions:
                properties[d] = normalize_properties(v, {PARALLEL_IF_PVT})

        # Finally, build the `alias` Cluster
        clusters.append(
            cluster.rebuild(exprs=expression,
                            ispace=ispace,
                            dspace=dspace,
                            properties=properties))

    return clusters, subs
Esempio n. 11
0
def _cse(maybe_exprs, make, mode='default'):
    """
    Main common sub-expressions elimination routine.

    Note: the output is guaranteed to be topologically sorted.

    Parameters
    ----------
    maybe_exprs : expr-like or list of expr-like  or Cluster
        One or more expressions to which CSE is applied.
    make : callable
        Build symbols to store temporary, redundant values.
    mode : str, optional
        The CSE algorithm applied. Accepted: ['default'].
    """

    # Note: not defaulting to SymPy's CSE() function for three reasons:
    # - it also captures array index access functions (eg, i+1 in A[i+1] and B[i+1]);
    # - it sometimes "captures too much", losing factorization opportunities;
    # - very slow
    # TODO: a second "sympy" mode will be provided, relying on SymPy's CSE() but
    # also ensuring some form of post-processing
    assert mode == 'default'  # Only supported mode ATM

    # Just for flexibility, accept either Clusters or exprs
    if isinstance(maybe_exprs, Cluster):
        cluster = maybe_exprs
        processed = list(cluster.exprs)
        scope = cluster.scope
    else:
        processed = list(maybe_exprs)
        scope = Scope(maybe_exprs)

    # Some sub-expressions aren't really "common" -- that's the case of Dimension-
    # independent data dependences. For example:
    #
    # ... = ... a[i] + 1 ...
    # a[i] = ...
    # ... = ... a[i] + 1 ...
    #
    # `a[i] + 1` will be excluded, as there's a flow Dimension-independent data
    # dependence involving `a`
    exclude = {i.source.access for i in scope.d_flow.independent()}

    mapped = []
    while True:
        # Detect redundancies
        counted = count(mapped + processed, q_xop).items()
        targets = OrderedDict([(k, estimate_cost(k, True)) for k, v in counted
                               if v > 1])

        # Rule out Dimension-independent data dependencies
        targets = OrderedDict([(k, v) for k, v in targets.items()
                               if not k.free_symbols & exclude])

        if not targets:
            break

        # Create temporaries
        hit = max(targets.values())
        picked = [k for k, v in targets.items() if v == hit]
        mapper = OrderedDict([(e, make()) for i, e in enumerate(picked)])

        # Apply replacements
        processed = [uxreplace(e, mapper) for e in processed]
        mapped = [uxreplace(e, mapper) for e in mapped]
        mapped = [Eq(v, k) for k, v in reversed(list(mapper.items()))] + mapped

        # Update `exclude` for the same reasons as above -- to rule out CSE across
        # Dimension-independent data dependences
        exclude.update({i for i in mapper.values()})

        # Prepare for the next round
        for k in picked:
            targets.pop(k)
    processed = mapped + processed

    # At this point we may have useless temporaries (e.g., r0=r1). Let's drop them
    processed = _compact_temporaries(processed)

    return processed
Esempio n. 12
0
def lower_schedule(cluster, schedule, chosen, sregistry, options):
    """
    Turn a Schedule into a sequence of Clusters.
    """
    onstack = options['cire-onstack']

    clusters = []
    subs = {}
    for alias, writeto, ispace, aliaseds, indicess in schedule:
        if all(i not in chosen for i in aliaseds):
            continue

        # The Dimensions defining the shape of Array
        # Note: with SubDimensions, we may have the following situation:
        #
        # for zi = z_m + zi_ltkn; zi <= z_M - zi_rtkn; ...
        #   r[zi] = ...
        #
        # Instead of `r[zi - z_m - zi_ltkn]` we have just `r[zi]`, so we'll need
        # as much room as in `zi`'s parent to avoid going OOB
        # Aside from ugly generated code, the reason we do not rather shift the
        # indices is that it prevents future passes to transform the loop bounds
        # (e.g., MPI's comp/comm overlap does that)
        dimensions = [d.parent if d.is_Sub else d for d in writeto.itdimensions]

        halo = [(abs(i.lower), abs(i.upper)) for i in writeto]

        # The data sharing mode of the Array. It can safely be `shared` only if
        # all of the PARALLEL `cluster` Dimensions appear in `writeto`
        parallel = [d for d, v in cluster.properties.items() if PARALLEL in v]
        sharing = 'shared' if set(parallel) == set(writeto.itdimensions) else 'local'

        # The memory region of the Array. On the heap, unless the user has
        # explicitly requested allocation on the stack
        scope = 'stack' if onstack else 'heap'

        array = Array(name=sregistry.make_name(), dimensions=dimensions, halo=halo,
                      dtype=cluster.dtype, scope=scope, sharing=sharing)

        indices = []
        for i in writeto:
            try:
                # E.g., `xs`
                sub_iterators = writeto.sub_iterators[i.dim]
                assert len(sub_iterators) == 1
                indices.append(sub_iterators[0])
            except KeyError:
                # E.g., `z` -- a non-shifted Dimension
                indices.append(i.dim - i.lower)

        expression = Eq(array[indices], alias)

        # Create the substitution rules so that we can use the newly created
        # temporary in place of the aliasing expressions
        for aliased, indices in zip(aliaseds, indicess):
            subs[aliased] = array[indices]
            if aliased in chosen:
                subs[chosen[aliased]] = array[indices]
            else:
                # Perhaps part of a composite alias ?
                pass

        # Construct the `alias` DataSpace
        accesses = detect_accesses(expression)
        parts = {k: IntervalGroup(build_intervals(v)).add(ispace.intervals).relaxed
                 for k, v in accesses.items() if k}
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Drop parallelism if using ModuloDimensions (due to rotations)
        properties = dict(cluster.properties)
        for d, v in cluster.properties.items():
            if any(i.is_Modulo for i in ispace.sub_iterators[d]):
                properties[d] = normalize_properties(v, {SEQUENTIAL})

        # Finally, build the `alias` Cluster
        clusters.append(cluster.rebuild(exprs=expression, ispace=ispace,
                                        dspace=dspace, properties=properties))

    return clusters, subs
Esempio n. 13
0
def process(candidates, aliases, cluster, template):
    """
    Create Clusters from aliasing expressions.
    """
    clusters = []
    subs = {}
    for origin, alias in aliases.items():
        if all(i not in candidates for i in alias.aliased):
            continue

        # The write-to Intervals
        writeto = [
            Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0)))
            for i in cluster.ispace.intervals if not i.dim.is_Time
        ]
        writeto = IntervalGroup(writeto)

        # Optimization: no need to retain a SpaceDimension if it does not
        # induce a flow/anti dependence (below, `i.offsets` captures this, by
        # telling how much halo will be required to honour such dependences)
        dep_inducing = [i for i in writeto if any(i.offsets)]
        try:
            index = writeto.index(dep_inducing[0])
            writeto = IntervalGroup(writeto[index:])
        except IndexError:
            perf_adv("Could not optimize some of the detected redundancies")

        # Create a temporary to store `alias`
        dimensions = [d.root for d in writeto.dimensions]
        halo = [(abs(i.lower), abs(i.upper)) for i in writeto]
        array = Array(name=template(),
                      dimensions=dimensions,
                      halo=halo,
                      dtype=cluster.dtype)

        # Build up the expression evaluating `alias`
        access = tuple(i.dim - i.lower for i in writeto)
        expression = Eq(array[access], origin.xreplace(subs))

        # Create the substitution rules so that we can use the newly created
        # temporary in place of the aliasing expressions
        for aliased, distance in alias.with_distance:
            assert all(i.dim in distance.labels for i in writeto)
            access = [i.dim - i.lower + distance[i.dim] for i in writeto]
            if aliased in candidates:
                # It would *not* be in `candidates` if part of a composite alias
                subs[candidates[aliased]] = array[access]
            subs[aliased] = array[access]

        # Construct the `alias` IterationSpace
        intervals, sub_iterators, directions = cluster.ispace.args
        ispace = IterationSpace(intervals.add(writeto), sub_iterators,
                                directions)

        # Optimize the `alias` IterationSpace: if possible, the innermost
        # IterationInterval is rounded up to a multiple of the vector length
        try:
            it = ispace.itintervals[-1]
            if ROUNDABLE in cluster.properties[it.dim]:
                from devito.parameters import configuration
                vl = configuration['platform'].simd_items_per_reg(
                    cluster.dtype)
                ispace = ispace.add(Interval(it.dim, 0, it.interval.size % vl))
        except (TypeError, KeyError):
            pass

        # Construct the `alias` DataSpace
        mapper = detect_accesses(expression)
        parts = {
            k: IntervalGroup(build_intervals(v)).add(ispace.intervals)
            for k, v in mapper.items() if k
        }
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Create a new Cluster for `alias`
        clusters.append(
            cluster.rebuild(exprs=[expression], ispace=ispace, dspace=dspace))

    return clusters, subs
Esempio n. 14
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
                                              **kwargs):
        """
        Search aliasing expressions and capture them into vector temporaries.

        Examples
        --------
        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        exprs = cluster.exprs

        # For more information about "aliases", refer to collect.__doc__
        aliases = collect(exprs)

        # Redundancies will be stored in space-varying temporaries
        is_time_invariant = make_is_time_invariant(exprs)
        time_invariants = {e.rhs: is_time_invariant(e) for e in exprs}

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for e in exprs:
            # Cost check (to keep the memory footprint under control)
            naliases = len(aliases.get(e.rhs))
            cost = estimate_cost(e, True) * naliases
            test0 = lambda: cost >= self.MIN_COST_ALIAS and naliases > 1
            test1 = lambda: cost >= self.MIN_COST_ALIAS_INV and time_invariants[
                e.rhs]
            if test0() or test1():
                candidates[e.rhs] = e.lhs
            else:
                processed.append(e)

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = []
        subs = {}
        for origin, alias in aliases.items():
            if all(i not in candidates for i in alias.aliased):
                continue

            # The write-to Intervals
            writeto = [
                Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0)))
                for i in cluster.ispace.intervals if not i.dim.is_Time
            ]
            writeto = IntervalGroup(writeto)

            # Optimization: no need to retain a SpaceDimension if it does not
            # induce a flow/anti dependence (below, `i.offsets` captures this, by
            # telling how much halo will be needed to honour such dependences)
            dep_inducing = [i for i in writeto if any(i.offsets)]
            try:
                index = writeto.index(dep_inducing[0])
                writeto = IntervalGroup(writeto[index:])
            except IndexError:
                warning("Couldn't optimize some of the detected redundancies")

            # Create a temporary to store `alias`
            dimensions = [d.root for d in writeto.dimensions]
            halo = [(abs(i.lower), abs(i.upper)) for i in writeto]
            array = Array(name=template(),
                          dimensions=dimensions,
                          halo=halo,
                          dtype=cluster.dtype)

            # Build up the expression evaluating `alias`
            access = tuple(i.dim - i.lower for i in writeto)
            expression = Eq(array[access], origin.xreplace(subs))

            # Create the substitution rules so that we can use the newly created
            # temporary in place of the aliasing expressions
            for aliased, distance in alias.with_distance:
                assert all(i.dim in distance.labels for i in writeto)
                access = [i.dim - i.lower + distance[i.dim] for i in writeto]
                if aliased in candidates:
                    # It would *not* be in `candidates` if part of a composite alias
                    subs[candidates[aliased]] = array[access]
                subs[aliased] = array[access]

            # Construct the `alias` IterationSpace
            intervals, sub_iterators, directions = cluster.ispace.args
            ispace = IterationSpace(intervals.add(writeto), sub_iterators,
                                    directions)

            # Construct the `alias` DataSpace
            mapper = detect_accesses(expression)
            parts = {
                k: IntervalGroup(build_intervals(v)).add(ispace.intervals)
                for k, v in mapper.items() if k
            }
            dspace = DataSpace(cluster.dspace.intervals, parts)

            # Create a new Cluster for `alias`
            alias_clusters.append(
                cluster.rebuild(exprs=[expression],
                                ispace=ispace,
                                dspace=dspace))

        # Switch temporaries in the expression trees
        processed = [e.xreplace(subs) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
Esempio n. 15
0
    def callback(self, clusters, prefix, cache=None):
        # Locate all Function accesses within the provided `clusters`
        accessmap = AccessMapper(clusters)

        # Create the buffers
        buffers = BufferBatch()
        for f, accessv in accessmap.items():
            # Has a buffer already been produced for `f`?
            if f in cache:
                continue

            # Is `f` really a buffering candidate?
            dims = self.callback0(f)
            if dims is None:
                continue
            if not all(any([i.dim in d._defines for i in prefix]) for d in dims):
                continue

            b = cache[f] = buffers.make(f, dims, accessv, self.options, self.sregistry)

        if not buffers:
            return clusters

        try:
            pd = prefix[-2].dim
        except IndexError:
            pd = None

        # Create Eqs to initialize buffers. Note: a buffer needs to be initialized
        # only if the buffered Function is read in at least one place or in the case
        # of non-uniform SubDimensions, to avoid uninitialized values to be copied-back
        # into the buffered Function
        noinit = self.options['buf-noinit']
        processed = []
        for b in buffers:
            if b.size == 1 and noinit:
                # Special case: avoid initialization if not strictly necessary
                # See docstring for more info about what this implies
                continue

            if b.is_read or not b.has_uniform_subdims:
                dims = b.function.dimensions
                lhs = b.indexed[[b.initmap.get(d, Map(d, d)).b for d in dims]]
                rhs = b.function[[b.initmap.get(d, Map(d, d)).f for d in dims]]

                expr = lower_exprs(Eq(lhs, rhs))
                ispace = b.writeto
                guards = {pd: GuardBound(d.root.symbolic_min, d.root.symbolic_max)
                          for d in b.contraction_mapper}
                properties = {d: {PARALLEL} for d in ispace.itdimensions}

                processed.append(
                    Cluster(expr, ispace, guards=guards, properties=properties)
                )

        # Substitution rules to replace buffered Functions with buffers
        subs = {}
        for b in buffers:
            for a in b.accessv.accesses:
                subs[a] = b.indexed[[b.index_mapper_flat.get(i, i) for i in a.indices]]

        for c in clusters:
            # If a buffer is read but never written, then we need to add
            # an Eq to step through the next slot
            # E.g., `ub[0, x] = u[time+2, x]`
            for b in buffers:
                if not b.is_readonly:
                    continue
                try:
                    c.exprs.index(b.firstread)
                except ValueError:
                    continue

                dims = b.function.dimensions
                lhs = b.indexed[[b.lastmap.get(d, Map(d, d)).b for d in dims]]
                rhs = b.function[[b.lastmap.get(d, Map(d, d)).f for d in dims]]

                expr = lower_exprs(uxreplace(Eq(lhs, rhs), b.subdims_mapper))
                ispace = b.written

                # Buffering creates a storage-related dependence along the
                # contracted dimensions
                properties = dict(c.properties)
                for d in b.contraction_mapper:
                    d = ispace[d].dim  # E.g., `time_sub -> time`
                    properties[d] = normalize_properties(properties[d], {SEQUENTIAL})

                processed.append(
                    c.rebuild(exprs=expr, ispace=ispace, properties=properties)
                )

            # Substitute buffered Functions with the newly created buffers
            exprs = [uxreplace(e, subs) for e in c.exprs]
            ispace = c.ispace
            for b in buffers:
                ispace = ispace.augment(b.sub_iterators)
            processed.append(c.rebuild(exprs=exprs, ispace=ispace))

            # Also append the copy-back if `e` is the last-write of some buffers
            # E.g., `u[time + 1, x] = ub[sb1, x]`
            for b in buffers:
                if b.is_readonly:
                    continue
                try:
                    c.exprs.index(b.lastwrite)
                except ValueError:
                    continue

                dims = b.function.dimensions
                lhs = b.function[[b.lastmap.get(d, Map(d, d)).f for d in dims]]
                rhs = b.indexed[[b.lastmap.get(d, Map(d, d)).b for d in dims]]

                expr = lower_exprs(uxreplace(Eq(lhs, rhs), b.subdims_mapper))
                ispace = b.written

                # Buffering creates a storage-related dependence along the
                # contracted dimensions
                properties = dict(c.properties)
                for d in b.contraction_mapper:
                    d = ispace[d].dim  # E.g., `time_sub -> time`
                    properties[d] = normalize_properties(properties[d], {SEQUENTIAL})

                processed.append(
                    c.rebuild(exprs=expr, ispace=ispace, properties=properties)
                )

        return processed