Example #1
0
    def callback(self, cgroups, prefix):
        # Toposort to maximize fusion
        if self.toposort:
            clusters = self._toposort(cgroups, prefix)
        else:
            clusters = ClusterGroup(cgroups)

        # Fusion
        processed = []
        for k, g in groupby(clusters, key=self._key):
            maybe_fusible = list(g)

            if len(maybe_fusible) == 1:
                processed.extend(maybe_fusible)
            else:
                try:
                    # Perform fusion
                    fused = Cluster.from_clusters(*maybe_fusible)
                    processed.append(fused)
                except ValueError:
                    # We end up here if, for example, some Clusters have same
                    # iteration Dimensions but different (partial) orderings
                    processed.extend(maybe_fusible)

        return [ClusterGroup(processed, prefix)]
Example #2
0
    def _toposort(self, cgroups, prefix):
        # Are there any ClusterGroups that could potentially be fused? If
        # not, do not waste time computing a new topological ordering
        counter = Counter(self._key(cg) for cg in cgroups)
        if not any(v > 1 for it, v in counter.most_common()):
            return ClusterGroup(cgroups)

        # Similarly, if all ClusterGroups have the same exact prefix and
        # use the same form of synchronization (if any at all), no need to
        # attempt a topological sorting
        if len(counter.most_common()) == 1:
            return ClusterGroup(cgroups)

        dag = self._build_dag(cgroups, prefix)

        def choose_element(queue, scheduled):
            # Heuristic: let `k0` be the key of the last scheduled node; then out of
            # the possible schedulable nodes we pick the one with key `k1` such that
            # `max_i : k0[:i] == k1[:i]` (i.e., the one with "the most similar key")
            if not scheduled:
                return queue.pop()
            key = self._key(scheduled[-1])
            for i in reversed(range(len(key) + 1)):
                candidates = [e for e in queue if self._key(e)[:i] == key[:i]]
                try:
                    # Ensure stability
                    e = min(candidates, key=lambda i: cgroups.index(i))
                except ValueError:
                    continue
                queue.remove(e)
                return e
            assert False

        return ClusterGroup(dag.topological_sort(choose_element))
Example #3
0
    def _toposort(self, cgroups, prefix):
        # Are there any ClusterGroups that could potentially be fused? If
        # not, do not waste time computing a new topological ordering
        counter = Counter(cg.itintervals for cg in cgroups)
        if not any(v > 1 for it, v in counter.most_common()):
            return ClusterGroup(cgroups)

        # Similarly, if all ClusterGroups have the same exact prefix, no
        # need to attempt a topological sorting
        if len(counter.most_common()) == 1:
            return ClusterGroup(cgroups)

        dag = self._build_dag(cgroups, prefix)

        def choose_element(queue, scheduled):
            # Heuristic: prefer a node having same IterationSpace as that
            # of the last scheduled node to maximize Cluster fusion
            if not scheduled:
                return queue.pop()
            last = scheduled[-1]
            for i in list(queue):
                if i.itintervals == last.itintervals:
                    queue.remove(i)
                    return i
            return queue.popleft()

        return ClusterGroup(dag.topological_sort(choose_element))
Example #4
0
def rewrite(clusters, mode='advanced'):
    """
    Given a sequence of N Clusters, produce a sequence of M Clusters with reduced
    operation count, with M >= N.

    Parameters
    ----------
    clusters : list of Cluster
        The Clusters to be transformed.
    mode : str, optional
        The aggressiveness of the rewrite. Accepted:
        - ``noop``: Do nothing.
        - ``basic``: Apply common sub-expressions elimination.
        - ``advanced``: Apply all transformations that will reduce the
                        operation count w/ minimum increase to the memory pressure,
                        namely 'basic', factorization, and cross-iteration redundancy
                        elimination ("CIRE") for time-invariants only.
        - ``aggressive``: Like 'advanced', but apply CIRE to time-varying
                          sub-expressions too.
                          Further, seek and drop cross-cluster redundancies (this
                          is the only pass that attempts to optimize *across*
                          Clusters, rather than within a Cluster).
                          The 'aggressive' mode may substantially increase the
                          symbolic processing time; it may or may not reduce the
                          JIT-compilation time; it may or may not improve the
                          overall runtime performance.
    """
    if not (mode is None or isinstance(mode, str)):
        raise ValueError("Parameter 'mode' should be a string, not %s." %
                         type(mode))

    if mode is None or mode == 'noop':
        return clusters

    # We use separate rewriters for dense and sparse clusters; sparse clusters have
    # non-affine index functions, thus making it basically impossible, in general,
    # to apply the more advanced DSE passes.
    # Note: the sparse rewriter uses the same template for temporaries as
    # the dense rewriter, thus temporaries are globally unique

    try:
        rewriter = modes[mode]()
    except KeyError:
        rewriter = CustomRewriter(mode)

    fallback = BasicRewriter(False, rewriter.template)
    states = [
        rewriter.run(c) if c.is_dense else fallback.run(c) for c in clusters
    ]

    # Print out profiling information
    print_profiling(states)

    # Schedule and optimize the Rewriters-produced clusters
    clusters = ClusterGroup(optimize(flatten(i.clusters for i in states)))

    # Turn unnecessary temporary Arrays into scalars
    clusters = scalarize(clusters, rewriter.template)

    return ClusterGroup(clusters)
Example #5
0
    def _lower_clusters(cls, expressions, profiler, **kwargs):
        """
        Clusters lowering:

            * Group expressions into Clusters;
            * Introduce guards for conditional Clusters.
        """
        # Build a sequence of Clusters from a sequence of Eqs
        clusters = clusterize(expressions)

        clusters = cls._specialize_clusters(clusters, profiler=profiler, **kwargs)

        return ClusterGroup(clusters)
Example #6
0
def rewrite(clusters, mode='advanced'):
    """
    Transform N :class:`Cluster` objects of SymPy expressions into M
    :class:`Cluster` objects of SymPy expressions with reduced
    operation count, with M >= N.

    :param clusters: The clusters to be transformed.
    :param mode: drive the expression transformation

    The ``mode`` parameter recognises the following values: ::

         * 'noop': Do nothing.
         * 'basic': Apply common sub-expressions elimination.
         * 'advanced': Apply all transformations that will reduce the
                       operation count w/ minimum increase to the memory pressure,
                       namely 'basic', factorization, CSRE for time-invariants only.
         * 'speculative': Like 'advanced', but apply CSRE also to time-varying
                          sub-expressions, which might further increase the memory
                          pressure.
         * 'aggressive': Like 'speculative', but apply CSRE to any non-trivial
                         sub-expression (i.e., anything that is at least in a
                         sum-of-products form). This may substantially increase
                         the memory pressure.
    """
    if not (mode is None or isinstance(mode, str)):
        raise ValueError("Parameter 'mode' should be a string, not %s." %
                         type(mode))

    if mode is None or mode == 'noop':
        return clusters

    processed = ClusterGroup()
    for cluster in clusters:
        if cluster.is_dense:
            if mode in modes:
                processed.extend(modes[mode]().run(cluster))
            else:
                try:
                    processed.extend(CustomRewriter().run(cluster))
                except DSEException:
                    dse_warning("Unknown rewrite mode(s) %s" % mode)
                    processed.append(cluster)
        else:
            # Downgrade sparse clusters to basic rewrite mode since it's
            # pointless to expose loop-redundancies when the iteration space
            # only consists of a few points
            processed.extend(BasicRewriter(False).run(cluster))

    return groupby(processed)
def rewrite(clusters, mode='advanced'):
    """
    Transform N :class:`Cluster` objects of SymPy expressions into M
    :class:`Cluster` objects of SymPy expressions with reduced
    operation count, with M >= N.

    :param clusters: The clusters to be transformed.
    :param mode: drive the expression transformation

    The ``mode`` parameter recognises the following values: ::

         * 'noop': Do nothing.
         * 'basic': Apply common sub-expressions elimination.
         * 'advanced': Apply all transformations that will reduce the
                       operation count w/ minimum increase to the memory pressure,
                       namely 'basic', factorization, CIRE for time-invariants only.
         * 'speculative': Like 'advanced', but apply CIRE also to time-varying
                          sub-expressions, which might further increase the memory
                          pressure.
         * 'aggressive': Like 'speculative', but apply CIRE to any non-trivial
                         sub-expression (i.e., anything that is at least in a
                         sum-of-products form). This may substantially increase
                         the memory pressure.
    """
    if not (mode is None or isinstance(mode, str)):
        raise ValueError("Parameter 'mode' should be a string, not %s." %
                         type(mode))

    if mode is None or mode == 'noop':
        return clusters
    elif mode not in modes:
        dse_warning("Unknown rewrite mode(s) %s" % mode)
        return clusters

    # Separate rewriters for dense and sparse clusters; sparse clusters have
    # non-affine index functions, thus making it basically impossible, in general,
    # to apply the more advanced DSE passes.
    # Note: the sparse rewriter uses the same template for temporaries as
    # the dense rewriter, thus temporaries are globally unique
    rewriter = modes[mode]()
    fallback = BasicRewriter(False, rewriter.template)

    processed = ClusterGroup(
        flatten(
            rewriter.run(c) if c.is_dense else fallback.run(c)
            for c in clusters))

    return groupby(processed).finalize()
Example #8
0
    def _lower_clusters(cls, expressions, profiler, **kwargs):
        """
        Clusters lowering:

            * Group expressions into Clusters;
            * Introduce guards for conditional Clusters;
            * Analyze Clusters to detect computational properties such
              as parallelism.
        """
        # Build a sequence of Clusters from a sequence of Eqs
        clusters = clusterize(expressions)

        # Operation count before specialization
        init_ops = sum(estimate_cost(c.exprs) for c in clusters if c.is_dense)

        clusters = cls._specialize_clusters(clusters, **kwargs)

        # Operation count after specialization
        final_ops = sum(estimate_cost(c.exprs) for c in clusters if c.is_dense)
        profiler.record_ops_variation(init_ops, final_ops)

        return ClusterGroup(clusters)
Example #9
0
def rewrite(clusters, mode='advanced'):
    """
    Given a sequence of N Clusters, produce a sequence of M Clusters with reduced
    operation count, with M >= N.

    Parameters
    ----------
    clusters : list of Cluster
        The Clusters to be transformed.
    mode : str, optional
        The aggressiveness of the rewrite. Accepted:
        - ``noop``: Do nothing.
        - ``basic``: Apply common sub-expressions elimination.
        - ``advanced``: Apply all transformations that will reduce the
                        operation count w/ minimum increase to the memory pressure,
                        namely 'basic', factorization, CIRE for time-invariants only.
        - ``speculative``: Like 'advanced', but apply CIRE also to time-varying
                           sub-expressions, which might further increase the memory
                           pressure.
         * ``aggressive``: Like 'speculative', but apply CIRE to any non-trivial
                           sub-expression (i.e., anything that is at least in a
                           sum-of-product form).
                           Further, seek and drop cross-cluster redundancies (this
                           is the only pass that attempts to optimize *across*
                           clusters, rather than within a cluster).
                           The 'aggressive' mode may substantially increase the
                           symbolic processing time; it may or may not reduce the
                           JIT-compilation time; it may or may not improve the
                           overall runtime performance.
    """
    if not (mode is None or isinstance(mode, str)):
        raise ValueError("Parameter 'mode' should be a string, not %s." %
                         type(mode))

    if mode is None or mode == 'noop':
        return clusters
    elif mode not in modes:
        dse_warning("Unknown rewrite mode(s) %s" % mode)
        return clusters

    # 1) Local optimization
    # ---------------------
    # We use separate rewriters for dense and sparse clusters; sparse clusters have
    # non-affine index functions, thus making it basically impossible, in general,
    # to apply the more advanced DSE passes.
    # Note: the sparse rewriter uses the same template for temporaries as
    # the dense rewriter, thus temporaries are globally unique
    rewriter = modes[mode]()
    fallback = BasicRewriter(False, rewriter.template)

    processed = ClusterGroup(
        flatten(
            rewriter.run(c) if c.is_dense else fallback.run(c)
            for c in clusters))

    # 2) Cluster grouping
    # -------------------
    # Different clusters may have created new (smaller) clusters which are
    # potentially groupable within a single cluster
    processed = groupby(processed)

    # 3)Global optimization
    # ---------------------
    # After grouping, there may be redundancies in one or more clusters. This final
    # pass searches and drops such redundancies
    if mode == 'aggressive':
        processed = cross_cluster_cse(processed)

    return processed.finalize()
Example #10
0
 def process(self, clusters):
     cgroups = [ClusterGroup(c, c.itintervals) for c in clusters]
     cgroups = self._process_fdta(cgroups, 1)
     clusters = ClusterGroup.concatenate(*cgroups)
     return clusters