예제 #1
0
파일: aliases.py 프로젝트: ofmla/devito
    def _aliases_from_clusters(self, clusters, exclude, meta):
        exprs = flatten([c.exprs for c in clusters])

        # [Clusters]_n -> [Schedule]_m
        variants = []
        for mapper in self._generate(exprs, exclude):
            # Clusters -> AliasList
            found = collect(mapper.extracted, meta.ispace, self.opt_minstorage)
            pexprs, aliases = choose(found, exprs, mapper, self.opt_mingain)
            if not aliases:
                continue

            # AliasList -> Schedule
            schedule = lower_aliases(aliases, meta, self.opt_maxpar)

            variants.append(Variant(schedule, pexprs))

        # [Schedule]_m -> Schedule (s.t. best memory/flops trade-off)
        if not variants:
            return []
        schedule, exprs = pick_best(variants, self.opt_schedule_strategy,
                                    self._eval_variants_delta)

        # Schedule -> Schedule (optimization)
        if self.opt_rotate:
            schedule = optimize_schedule_rotations(schedule, self.sregistry)
        schedule = optimize_schedule_padding(schedule, meta, self.platform)

        # Schedule -> [Clusters]_k
        processed, subs = lower_schedule(schedule, meta, self.sregistry,
                                         self.opt_ftemps)

        # [Clusters]_k -> [Clusters]_k (optimization)
        if self.opt_multisubdomain:
            processed = optimize_clusters_msds(processed)

        # [Clusters]_k -> [Clusters]_{k+n}
        for c in clusters:
            n = len(c.exprs)
            cexprs, exprs = exprs[:n], exprs[n:]

            cexprs = [uxreplace(e, subs) for e in cexprs]

            ispace = c.ispace.augment(schedule.dmapper)
            ispace = ispace.augment(schedule.rmapper)

            accesses = detect_accesses(cexprs)
            parts = {
                k: IntervalGroup(build_intervals(v)).relaxed
                for k, v in accesses.items() if k
            }
            dspace = DataSpace(c.dspace.intervals, parts)

            processed.append(
                c.rebuild(exprs=cexprs, ispace=ispace, dspace=dspace))

        assert len(exprs) == 0

        return processed
예제 #2
0
파일: buffering.py 프로젝트: ofmla/devito
def derive_dspace(expr, ispace):
    accesses = detect_accesses(expr)
    parts = {
        k: IntervalGroup(build_intervals(v)).relaxed
        for k, v in accesses.items() if k
    }
    dspace = DataSpace(ispace.intervals, parts)

    return dspace
예제 #3
0
def rebuild(cluster, others, aliases, subs):
    # Rebuild the non-aliasing expressions
    exprs = [uxreplace(e, subs) for e in others]

    # Add any new ShiftedDimension to the IterationSpace
    ispace = cluster.ispace.augment(aliases.index_mapper)

    # Rebuild the DataSpace to include the new symbols
    accesses = detect_accesses(exprs)
    parts = {k: IntervalGroup(build_intervals(v)).relaxed
             for k, v in accesses.items() if k}
    dspace = DataSpace(cluster.dspace.intervals, parts)

    return cluster.rebuild(exprs=exprs, ispace=ispace, dspace=dspace)
예제 #4
0
def rebuild(cluster, others, schedule, subs):
    """
    Plug the optimized aliases into the input Cluster. This leads to creating
    a new Cluster with suitable IterationSpace and DataSpace.
    """
    exprs = [uxreplace(e, subs) for e in others]

    ispace = cluster.ispace.augment(schedule.dmapper)
    ispace = ispace.augment(schedule.rmapper)

    accesses = detect_accesses(exprs)
    parts = {k: IntervalGroup(build_intervals(v)).relaxed
             for k, v in accesses.items() if k}
    dspace = DataSpace(cluster.dspace.intervals, parts)

    return cluster.rebuild(exprs=exprs, ispace=ispace, dspace=dspace)
예제 #5
0
파일: aliases.py 프로젝트: ofmla/devito
def optimize_clusters_msds(clusters):
    """
    Relax the clusters by letting the expressions defined over MultiSubDomains to
    rather be computed over the entire domain. This increases the likelihood of
    code lifting by later passes.
    """
    processed = []
    for c in clusters:
        msds = [
            d for d in c.ispace.itdimensions
            if isinstance(d, MultiSubDimension)
        ]

        if msds:
            mapper = {d: d.root for d in msds}
            exprs = [uxreplace(e, mapper) for e in c.exprs]

            ispace = c.ispace.relaxed(msds)

            accesses = detect_accesses(exprs)
            parts = {
                k: IntervalGroup(build_intervals(v)).relaxed
                for k, v in accesses.items() if k
            }
            intervals = [i for i in c.dspace if i.dim not in msds]
            dspace = DataSpace(intervals, parts)

            guards = {mapper.get(d, d): v for d, v in c.guards.items()}
            properties = {mapper.get(d, d): v for d, v in c.properties.items()}
            syncs = {mapper.get(d, d): v for d, v in c.syncs.items()}

            processed.append(
                c.rebuild(exprs=exprs,
                          ispace=ispace,
                          dspace=dspace,
                          guards=guards,
                          properties=properties,
                          syncs=syncs))
        else:
            processed.append(c)

    return processed
예제 #6
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
                                              **kwargs):
        """
        Search aliasing expressions and capture them into vector temporaries.

        Examples
        --------
        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        # For more information about "aliases", refer to collect.__doc__
        aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        graph = FlowGraph(cluster.exprs)
        time_invariants = {
            v.rhs: graph.time_invariant(v)
            for v in graph.values()
        }

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in graph.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(aliases.get(v.rhs))
            cost = estimate_cost(v, True) * naliases
            test0 = lambda: cost >= self.MIN_COST_ALIAS and naliases > 1
            test1 = lambda: cost >= self.MIN_COST_ALIAS_INV and time_invariants[
                v.rhs]
            if test0() or test1():
                candidates[v.rhs] = k
            else:
                processed.append(v)

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = []
        subs = {}
        for origin, alias in aliases.items():
            if all(i not in candidates for i in alias.aliased):
                continue

            # The write-to Intervals
            writeto = [
                Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0)))
                for i in cluster.ispace.intervals if not i.dim.is_Time
            ]
            writeto = IntervalGroup(writeto)

            # Optimization: no need to retain a SpaceDimension if it does not
            # induce a flow/anti dependence (below, `i.offsets` captures this, by
            # telling how much halo will be needed to honour such dependences)
            dep_inducing = [i for i in writeto if any(i.offsets)]
            try:
                index = writeto.index(dep_inducing[0])
                writeto = IntervalGroup(writeto[index:])
            except IndexError:
                warning("Couldn't optimize some of the detected redundancies")

            # Create a temporary to store `alias`
            dimensions = [d.root for d in writeto.dimensions]
            halo = [(abs(i.lower), abs(i.upper)) for i in writeto]
            array = Array(name=template(),
                          dimensions=dimensions,
                          halo=halo,
                          dtype=cluster.dtype)

            # Build up the expression evaluating `alias`
            access = tuple(i.dim - i.lower for i in writeto)
            expression = Eq(array[access], origin)

            # Create the substitution rules so that we can use the newly created
            # temporary in place of the aliasing expressions
            for aliased, distance in alias.with_distance:
                assert all(i.dim in distance.labels for i in writeto)
                access = [i.dim - i.lower + distance[i.dim] for i in writeto]
                if aliased in candidates:
                    # It would *not* be in `candidates` if part of a composite alias
                    subs[candidates[aliased]] = array[access]
                subs[aliased] = array[access]

            # Construct the `alias` IterationSpace
            intervals, sub_iterators, directions = cluster.ispace.args
            ispace = IterationSpace(intervals.add(writeto), sub_iterators,
                                    directions)

            # Construct the `alias` DataSpace
            mapper = detect_accesses(expression)
            parts = {
                k: IntervalGroup(build_intervals(v)).add(ispace.intervals)
                for k, v in mapper.items() if k
            }
            dspace = DataSpace(cluster.dspace.intervals, parts)

            # Create a new Cluster for `alias`
            alias_clusters.append(Cluster([expression], ispace, dspace))

        # Switch temporaries in the expression trees
        processed = [e.xreplace(subs) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
예제 #7
0
def process(cluster, chosen, aliases, sregistry, platform):
    clusters = []
    subs = {}
    for alias, writeto, aliaseds, distances in aliases.iter(cluster.ispace):
        if all(i not in chosen for i in aliaseds):
            continue

        # The Dimensions defining the shape of Array
        # Note: with SubDimensions, we may have the following situation:
        #
        # for zi = z_m + zi_ltkn; zi <= z_M - zi_rtkn; ...
        #   r[zi] = ...
        #
        # Instead of `r[zi - z_m - zi_ltkn]` we have just `r[zi]`, so we'll need
        # as much room as in `zi`'s parent to avoid going OOB
        # Aside from ugly generated code, the reason we do not rather shift the
        # indices is that it prevents future passes to transform the loop bounds
        # (e.g., MPI's comp/comm overlap does that)
        dimensions = [d.parent if d.is_Sub else d for d in writeto.dimensions]

        # The halo of the Array
        halo = [(abs(i.lower), abs(i.upper)) for i in writeto]

        # The data sharing mode of the Array
        sharing = 'local' if any(d.is_Incr for d in writeto.dimensions) else 'shared'

        # Finally create the temporary Array that will store `alias`
        array = Array(name=sregistry.make_name(), dimensions=dimensions, halo=halo,
                      dtype=cluster.dtype, sharing=sharing)

        # The access Dimensions may differ from `writeto.dimensions`. This may
        # happen e.g. if ShiftedDimensions are introduced (`a[x,y]` -> `a[xs,y]`)
        adims = [aliases.index_mapper.get(d, d) for d in writeto.dimensions]

        # The expression computing `alias`
        adims = [aliases.index_mapper.get(d, d) for d in writeto.dimensions]  # x -> xs
        indices = [d - (0 if writeto[d].is_Null else writeto[d].lower) for d in adims]
        expression = Eq(array[indices], uxreplace(alias, subs))

        # Create the substitution rules so that we can use the newly created
        # temporary in place of the aliasing expressions
        for aliased, distance in zip(aliaseds, distances):
            assert all(i.dim in distance.labels for i in writeto)

            indices = [d - i.lower + distance[i.dim] for d, i in zip(adims, writeto)]
            subs[aliased] = array[indices]

            if aliased in chosen:
                subs[chosen[aliased]] = array[indices]
            else:
                # Perhaps part of a composite alias ?
                pass

        # Construct the `alias` IterationSpace
        ispace = cluster.ispace.add(writeto).augment(aliases.index_mapper)

        # Optimization: if possible, the innermost IterationInterval is
        # rounded up to a multiple of the vector length
        try:
            it = ispace.itintervals[-1]
            if ROUNDABLE in cluster.properties[it.dim]:
                vl = platform.simd_items_per_reg(cluster.dtype)
                ispace = ispace.add(Interval(it.dim, 0, it.interval.size % vl))
        except (TypeError, KeyError):
            pass

        # Construct the `alias` DataSpace
        accesses = detect_accesses(expression)
        parts = {k: IntervalGroup(build_intervals(v)).add(ispace.intervals).relaxed
                 for k, v in accesses.items() if k}
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Finally, build a new Cluster for `alias`
        built = cluster.rebuild(exprs=expression, ispace=ispace, dspace=dspace)
        clusters.insert(0, built)

    return clusters, subs
예제 #8
0
def lower_schedule(cluster, schedule, sregistry, options):
    """
    Turn a Schedule into a sequence of Clusters.
    """
    ftemps = options['cire-ftemps']

    if ftemps:
        make = TempFunction
    else:
        # Typical case -- the user does *not* "see" the CIRE-created temporaries
        make = Array

    clusters = []
    subs = {}
    for alias, writeto, ispace, aliaseds, indicess in schedule:
        # Basic info to create the temporary that will hold the alias
        name = sregistry.make_name()
        dtype = cluster.dtype

        if writeto:
            # The Dimensions defining the shape of Array
            # Note: with SubDimensions, we may have the following situation:
            #
            # for zi = z_m + zi_ltkn; zi <= z_M - zi_rtkn; ...
            #   r[zi] = ...
            #
            # Instead of `r[zi - z_m - zi_ltkn]` we have just `r[zi]`, so we'll need
            # as much room as in `zi`'s parent to avoid going OOB
            # Aside from ugly generated code, the reason we do not rather shift the
            # indices is that it prevents future passes to transform the loop bounds
            # (e.g., MPI's comp/comm overlap does that)
            dimensions = [
                d.parent if d.is_Sub else d for d in writeto.itdimensions
            ]

            # The halo must be set according to the size of writeto space
            halo = [(abs(i.lower), abs(i.upper)) for i in writeto]

            # The indices used to write into the Array
            indices = []
            for i in writeto:
                try:
                    # E.g., `xs`
                    sub_iterators = writeto.sub_iterators[i.dim]
                    assert len(sub_iterators) == 1
                    indices.append(sub_iterators[0])
                except KeyError:
                    # E.g., `z` -- a non-shifted Dimension
                    indices.append(i.dim - i.lower)

            obj = make(name=name,
                       dimensions=dimensions,
                       halo=halo,
                       dtype=dtype)
            expression = Eq(obj[indices], alias)

            callback = lambda idx: obj[idx]
        else:
            # Degenerate case: scalar expression
            assert writeto.size == 0

            obj = Symbol(name=name, dtype=dtype)
            expression = Eq(obj, alias)

            callback = lambda idx: obj

        # Create the substitution rules for the aliasing expressions
        subs.update({
            aliased: callback(indices)
            for aliased, indices in zip(aliaseds, indicess)
        })

        # Construct the `alias` DataSpace
        accesses = detect_accesses(expression)
        parts = {
            k: IntervalGroup(build_intervals(v)).add(ispace.intervals).relaxed
            for k, v in accesses.items() if k
        }
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Drop or weaken parallelism if necessary
        properties = dict(cluster.properties)
        for d, v in cluster.properties.items():
            if any(i.is_Modulo for i in ispace.sub_iterators[d]):
                properties[d] = normalize_properties(v, {SEQUENTIAL})
            elif d not in writeto.dimensions:
                properties[d] = normalize_properties(v, {PARALLEL_IF_PVT})

        # Finally, build the `alias` Cluster
        clusters.append(
            cluster.rebuild(exprs=expression,
                            ispace=ispace,
                            dspace=dspace,
                            properties=properties))

    return clusters, subs
예제 #9
0
    def _eliminate_inter_stencil_redundancies(self, cluster, template,
                                              **kwargs):
        """
        Search for redundancies across the expressions and expose them
        to the later stages of the optimisation pipeline by introducing
        new temporaries of suitable rank.

        Two type of redundancies are sought:

            * Time-invariants, and
            * Across different space points

        Examples
        ========
        Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then:

        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        if cluster.is_sparse:
            return cluster

        # For more information about "aliases", refer to collect.__doc__
        mapper, aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        g = cluster.trace
        indices = g.space_indices
        time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()}

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in g.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(mapper.get(v.rhs, []))
            cost = estimate_cost(v, True) * naliases
            if cost >= self.thresholds['min-cost-alias'] and\
                    (naliases > 1 or time_invariants[v.rhs]):
                candidates[v.rhs] = k
            else:
                processed.append(v)

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = ClusterGroup()
        rules = OrderedDict()
        for origin, alias in aliases.items():
            if all(i not in candidates for i in alias.aliased):
                continue
            # Construct an iteration space suitable for /alias/
            intervals, sub_iterators, directions = cluster.ispace.args
            intervals = [
                Interval(i.dim, *alias.relaxed_diameter.get(i.dim, i.limits))
                for i in cluster.ispace.intervals
            ]
            ispace = IterationSpace(intervals, sub_iterators, directions)

            # Optimization: perhaps we can lift the cluster outside the time dimension
            if all(time_invariants[i] for i in alias.aliased):
                ispace = ispace.project(lambda i: not i.is_Time)

            # Build a symbolic function for /alias/
            intervals = ispace.intervals
            halo = [(abs(intervals[i].lower), abs(intervals[i].upper))
                    for i in indices]
            function = Array(name=template(), dimensions=indices, halo=halo)
            access = tuple(i - intervals[i].lower for i in indices)
            expression = Eq(Indexed(function.indexed, *access), origin)

            # Construct a data space suitable for /alias/
            mapper = detect_accesses(expression)
            parts = {
                k: IntervalGroup(build_intervals(v)).add(intervals)
                for k, v in mapper.items() if k
            }
            dspace = DataSpace([i.zero() for i in intervals], parts)

            # Create a new Cluster for /alias/
            alias_clusters.append(Cluster([expression], ispace, dspace))

            # Add substitution rules
            for aliased, distance in alias.with_distance:
                access = [
                    i - intervals[i].lower + j for i, j in distance
                    if i in indices
                ]
                temporary = Indexed(function.indexed, *tuple(access))
                rules[candidates[aliased]] = temporary
                rules[aliased] = temporary

        # Group clusters together if possible
        alias_clusters = groupby(alias_clusters).finalize()
        alias_clusters.sort(key=lambda i: i.is_dense)

        # Switch temporaries in the expression trees
        processed = [e.xreplace(rules) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
예제 #10
0
파일: advanced.py 프로젝트: opesci/devito
    def _eliminate_inter_stencil_redundancies(self, cluster, template, **kwargs):
        """
        Search for redundancies across the expressions and expose them
        to the later stages of the optimisation pipeline by introducing
        new temporaries of suitable rank.

        Two type of redundancies are sought:

            * Time-invariants, and
            * Across different space points

        Examples
        ========
        Let ``t`` be the time dimension, ``x, y, z`` the space dimensions. Then:

        1) temp = (a[x,y,z]+b[x,y,z])*c[t,x,y,z]
           >>>
           ti[x,y,z] = a[x,y,z] + b[x,y,z]
           temp = ti[x,y,z]*c[t,x,y,z]

        2) temp1 = 2.0*a[x,y,z]*b[x,y,z]
           temp2 = 3.0*a[x,y,z+1]*b[x,y,z+1]
           >>>
           ti[x,y,z] = a[x,y,z]*b[x,y,z]
           temp1 = 2.0*ti[x,y,z]
           temp2 = 3.0*ti[x,y,z+1]
        """
        if cluster.is_sparse:
            return cluster

        # For more information about "aliases", refer to collect.__doc__
        mapper, aliases = collect(cluster.exprs)

        # Redundancies will be stored in space-varying temporaries
        g = cluster.trace
        indices = g.space_indices
        time_invariants = {v.rhs: g.time_invariant(v) for v in g.values()}

        # Find the candidate expressions
        processed = []
        candidates = OrderedDict()
        for k, v in g.items():
            # Cost check (to keep the memory footprint under control)
            naliases = len(mapper.get(v.rhs, []))
            cost = estimate_cost(v, True)*naliases
            if cost >= self.MIN_COST_ALIAS and (naliases > 1 or time_invariants[v.rhs]):
                candidates[v.rhs] = k
            else:
                processed.append(v)

        # Create alias Clusters and all necessary substitution rules
        # for the new temporaries
        alias_clusters = ClusterGroup()
        rules = OrderedDict()
        for origin, alias in aliases.items():
            if all(i not in candidates for i in alias.aliased):
                continue
            # Construct an iteration space suitable for /alias/
            intervals, sub_iterators, directions = cluster.ispace.args
            intervals = [Interval(i.dim, *alias.relaxed_diameter.get(i.dim, i.limits))
                         for i in cluster.ispace.intervals]
            ispace = IterationSpace(intervals, sub_iterators, directions)

            # Optimization: perhaps we can lift the cluster outside the time dimension
            if all(time_invariants[i] for i in alias.aliased):
                ispace = ispace.project(lambda i: not i.is_Time)

            # Build a symbolic function for /alias/
            intervals = ispace.intervals
            halo = [(abs(intervals[i].lower), abs(intervals[i].upper)) for i in indices]
            function = Array(name=template(), dimensions=indices, halo=halo)
            access = tuple(i - intervals[i].lower for i in indices)
            expression = Eq(function[access], origin)

            # Construct a data space suitable for /alias/
            mapper = detect_accesses(expression)
            parts = {k: IntervalGroup(build_intervals(v)).add(intervals)
                     for k, v in mapper.items() if k}
            dspace = DataSpace([i.zero() for i in intervals], parts)

            # Create a new Cluster for /alias/
            alias_clusters.append(Cluster([expression], ispace, dspace))

            # Add substitution rules
            for aliased, distance in alias.with_distance:
                access = [i - intervals[i].lower + j for i, j in distance if i in indices]
                rules[candidates[aliased]] = function[access]
                rules[aliased] = function[access]

        # Group clusters together if possible
        alias_clusters = groupby(alias_clusters).finalize()
        alias_clusters.sort(key=lambda i: i.is_dense)

        # Switch temporaries in the expression trees
        processed = [e.xreplace(rules) for e in processed]

        return alias_clusters + [cluster.rebuild(processed)]
예제 #11
0
def lower_schedule(cluster, schedule, chosen, sregistry, options):
    """
    Turn a Schedule into a sequence of Clusters.
    """
    onstack = options['cire-onstack']

    clusters = []
    subs = {}
    for alias, writeto, ispace, aliaseds, indicess in schedule:
        if all(i not in chosen for i in aliaseds):
            continue

        # The Dimensions defining the shape of Array
        # Note: with SubDimensions, we may have the following situation:
        #
        # for zi = z_m + zi_ltkn; zi <= z_M - zi_rtkn; ...
        #   r[zi] = ...
        #
        # Instead of `r[zi - z_m - zi_ltkn]` we have just `r[zi]`, so we'll need
        # as much room as in `zi`'s parent to avoid going OOB
        # Aside from ugly generated code, the reason we do not rather shift the
        # indices is that it prevents future passes to transform the loop bounds
        # (e.g., MPI's comp/comm overlap does that)
        dimensions = [d.parent if d.is_Sub else d for d in writeto.itdimensions]

        halo = [(abs(i.lower), abs(i.upper)) for i in writeto]

        # The data sharing mode of the Array. It can safely be `shared` only if
        # all of the PARALLEL `cluster` Dimensions appear in `writeto`
        parallel = [d for d, v in cluster.properties.items() if PARALLEL in v]
        sharing = 'shared' if set(parallel) == set(writeto.itdimensions) else 'local'

        # The memory region of the Array. On the heap, unless the user has
        # explicitly requested allocation on the stack
        scope = 'stack' if onstack else 'heap'

        array = Array(name=sregistry.make_name(), dimensions=dimensions, halo=halo,
                      dtype=cluster.dtype, scope=scope, sharing=sharing)

        indices = []
        for i in writeto:
            try:
                # E.g., `xs`
                sub_iterators = writeto.sub_iterators[i.dim]
                assert len(sub_iterators) == 1
                indices.append(sub_iterators[0])
            except KeyError:
                # E.g., `z` -- a non-shifted Dimension
                indices.append(i.dim - i.lower)

        expression = Eq(array[indices], alias)

        # Create the substitution rules so that we can use the newly created
        # temporary in place of the aliasing expressions
        for aliased, indices in zip(aliaseds, indicess):
            subs[aliased] = array[indices]
            if aliased in chosen:
                subs[chosen[aliased]] = array[indices]
            else:
                # Perhaps part of a composite alias ?
                pass

        # Construct the `alias` DataSpace
        accesses = detect_accesses(expression)
        parts = {k: IntervalGroup(build_intervals(v)).add(ispace.intervals).relaxed
                 for k, v in accesses.items() if k}
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Drop parallelism if using ModuloDimensions (due to rotations)
        properties = dict(cluster.properties)
        for d, v in cluster.properties.items():
            if any(i.is_Modulo for i in ispace.sub_iterators[d]):
                properties[d] = normalize_properties(v, {SEQUENTIAL})

        # Finally, build the `alias` Cluster
        clusters.append(cluster.rebuild(exprs=expression, ispace=ispace,
                                        dspace=dspace, properties=properties))

    return clusters, subs
예제 #12
0
def process(candidates, aliases, cluster, template):
    """
    Create Clusters from aliasing expressions.
    """
    clusters = []
    subs = {}
    for origin, alias in aliases.items():
        if all(i not in candidates for i in alias.aliased):
            continue

        # The write-to Intervals
        writeto = [
            Interval(i.dim, *alias.relaxed_diameter.get(i.dim, (0, 0)))
            for i in cluster.ispace.intervals if not i.dim.is_Time
        ]
        writeto = IntervalGroup(writeto)

        # Optimization: no need to retain a SpaceDimension if it does not
        # induce a flow/anti dependence (below, `i.offsets` captures this, by
        # telling how much halo will be required to honour such dependences)
        dep_inducing = [i for i in writeto if any(i.offsets)]
        try:
            index = writeto.index(dep_inducing[0])
            writeto = IntervalGroup(writeto[index:])
        except IndexError:
            perf_adv("Could not optimize some of the detected redundancies")

        # Create a temporary to store `alias`
        dimensions = [d.root for d in writeto.dimensions]
        halo = [(abs(i.lower), abs(i.upper)) for i in writeto]
        array = Array(name=template(),
                      dimensions=dimensions,
                      halo=halo,
                      dtype=cluster.dtype)

        # Build up the expression evaluating `alias`
        access = tuple(i.dim - i.lower for i in writeto)
        expression = Eq(array[access], origin.xreplace(subs))

        # Create the substitution rules so that we can use the newly created
        # temporary in place of the aliasing expressions
        for aliased, distance in alias.with_distance:
            assert all(i.dim in distance.labels for i in writeto)
            access = [i.dim - i.lower + distance[i.dim] for i in writeto]
            if aliased in candidates:
                # It would *not* be in `candidates` if part of a composite alias
                subs[candidates[aliased]] = array[access]
            subs[aliased] = array[access]

        # Construct the `alias` IterationSpace
        intervals, sub_iterators, directions = cluster.ispace.args
        ispace = IterationSpace(intervals.add(writeto), sub_iterators,
                                directions)

        # Optimize the `alias` IterationSpace: if possible, the innermost
        # IterationInterval is rounded up to a multiple of the vector length
        try:
            it = ispace.itintervals[-1]
            if ROUNDABLE in cluster.properties[it.dim]:
                from devito.parameters import configuration
                vl = configuration['platform'].simd_items_per_reg(
                    cluster.dtype)
                ispace = ispace.add(Interval(it.dim, 0, it.interval.size % vl))
        except (TypeError, KeyError):
            pass

        # Construct the `alias` DataSpace
        mapper = detect_accesses(expression)
        parts = {
            k: IntervalGroup(build_intervals(v)).add(ispace.intervals)
            for k, v in mapper.items() if k
        }
        dspace = DataSpace(cluster.dspace.intervals, parts)

        # Create a new Cluster for `alias`
        clusters.append(
            cluster.rebuild(exprs=[expression], ispace=ispace, dspace=dspace))

    return clusters, subs