def _specialize_clusters(cls, clusters, **kwargs): """ Optimize Clusters for better runtime performance. """ # To create temporaries counter = generator() template = lambda: "r%d" % counter() # Toposort+Fusion (the former to expose more fusion opportunities) clusters = Toposort().process(clusters) clusters = fuse(clusters) # Flop reduction via the DSE clusters = rewrite(clusters, template, **kwargs) # Lifting clusters = Lift().process(clusters) # Lifting may create fusion opportunities, which in turn may enable # further optimizations clusters = fuse(clusters) clusters = eliminate_arrays(clusters, template) clusters = scalarize(clusters, template) return clusters
def _specialize_clusters(cls, clusters, **kwargs): # TODO: this is currently identical to CPU64NoopOperator._specialize_clusters, # but it will have to change # To create temporaries counter = generator() template = lambda: "r%d" % counter() # Toposort+Fusion (the former to expose more fusion opportunities) clusters = Toposort().process(clusters) clusters = fuse(clusters) # Flop reduction via the DSE clusters = rewrite(clusters, template, **kwargs) # Lifting clusters = Lift().process(clusters) # Lifting may create fusion opportunities, which in turn may enable # further optimizations clusters = fuse(clusters) clusters = eliminate_arrays(clusters, template) clusters = scalarize(clusters, template) return clusters
def _specialize_clusters(cls, clusters, **kwargs): options = kwargs['options'] platform = kwargs['platform'] # To create temporaries counter = generator() template = lambda: "r%d" % counter() # Toposort+Fusion (the former to expose more fusion opportunities) clusters = Toposort().process(clusters) clusters = fuse(clusters) # Hoist and optimize Dimension-invariant sub-expressions clusters = cire(clusters, template, 'invariants', options, platform) clusters = Lift().process(clusters) # Reduce flops (potential arithmetic alterations) clusters = extract_increments(clusters, template) clusters = cire(clusters, template, 'sops', options, platform) clusters = factorize(clusters) clusters = optimize_pows(clusters) # Reduce flops (no arithmetic alterations) clusters = cse(clusters, template) # The previous passes may have created fusion opportunities, which in # turn may enable further optimizations clusters = fuse(clusters) clusters = eliminate_arrays(clusters, template) clusters = scalarize(clusters, template) # Blocking to improve data locality clusters = Blocking(options).process(clusters) return clusters