def _specialize_clusters(cls, clusters, **kwargs): options = kwargs['options'] platform = kwargs['platform'] # To create temporaries counter = generator() template = lambda: "r%d" % counter() # Toposort+Fusion (the former to expose more fusion opportunities) clusters = fuse(clusters, toposort=True) # Hoist and optimize Dimension-invariant sub-expressions clusters = cire(clusters, template, 'invariants', options, platform) clusters = Lift().process(clusters) # Reduce flops (potential arithmetic alterations) clusters = extract_increments(clusters, template) clusters = cire(clusters, template, 'sops', options, platform) clusters = factorize(clusters) clusters = optimize_pows(clusters) # Reduce flops (no arithmetic alterations) clusters = cse(clusters, template) # Lifting may create fusion opportunities, which in turn may enable # further optimizations clusters = fuse(clusters) clusters = eliminate_arrays(clusters, template) return clusters
def _specialize_clusters(cls, clusters, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Toposort+Fusion (the former to expose more fusion opportunities) clusters = fuse(clusters, toposort=True) # Hoist and optimize Dimension-invariant sub-expressions clusters = cire(clusters, 'invariants', sregistry, options, platform) clusters = Lift().process(clusters) # Reduce flops (potential arithmetic alterations) clusters = extract_increments(clusters, sregistry) clusters = cire(clusters, 'sops', sregistry, options, platform) clusters = factorize(clusters) clusters = optimize_pows(clusters) # The previous passes may have created fusion opportunities, which in # turn may enable further optimizations clusters = fuse(clusters) clusters = eliminate_arrays(clusters) # Reduce flops (no arithmetic alterations) clusters = cse(clusters, sregistry) # Blocking to improve data locality clusters = Blocking(options).process(clusters) return clusters
def _make_clusters_passes_mapper(cls, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] runs_on_host, reads_if_on_host = make_callbacks(options) return { 'blocking': Blocking(options).process, 'tasking': Tasker(runs_on_host).process, 'streaming': Streaming(reads_if_on_host).process, 'factorize': factorize, 'fuse': fuse, 'lift': lambda i: Lift().process( cire(i, 'invariants', sregistry, options, platform)), 'cire-sops': lambda i: cire(i, 'sops', sregistry, options, platform), 'cse': lambda i: cse(i, sregistry), 'opt-pows': optimize_pows, 'topofuse': lambda i: fuse(i, toposort=True) }
def _make_clusters_passes_mapper(cls, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] return { 'blocking': Blocking(options).process, 'factorize': factorize, 'fuse': fuse, 'lift': lambda i: Lift().process( cire(i, 'invariants', sregistry, options, platform)), 'cire-sops': lambda i: cire(i, 'sops', sregistry, options, platform), 'cire-divs': lambda i: cire(i, 'divs', sregistry, options, platform), 'cse': lambda i: cse(i, sregistry), 'opt-pows': optimize_pows, 'topofuse': lambda i: fuse(i, toposort=True) }
def _specialize_clusters(cls, clusters, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Toposort+Fusion (the former to expose more fusion opportunities) clusters = fuse(clusters, toposort=True) # Hoist and optimize Dimension-invariant sub-expressions clusters = cire(clusters, 'invariants', sregistry, options, platform) clusters = Lift().process(clusters) # Reduce flops clusters = extract_increments(clusters, sregistry) clusters = cire(clusters, 'sops', sregistry, options, platform) clusters = factorize(clusters) clusters = optimize_pows(clusters) # The previous passes may have created fusion opportunities clusters = fuse(clusters) # Reduce flops clusters = cse(clusters, sregistry) return clusters
def _specialize_clusters(cls, clusters, **kwargs): # TODO: this is currently identical to CPU64NoopOperator._specialize_clusters, # but it will have to change # To create temporaries counter = generator() template = lambda: "r%d" % counter() # Toposort+Fusion (the former to expose more fusion opportunities) clusters = Toposort().process(clusters) clusters = fuse(clusters) # Flop reduction via the DSE clusters = rewrite(clusters, template, **kwargs) # Lifting clusters = Lift().process(clusters) # Lifting may create fusion opportunities, which in turn may enable # further optimizations clusters = fuse(clusters) clusters = eliminate_arrays(clusters, template) clusters = scalarize(clusters, template) return clusters
def _make_clusters_passes_mapper(cls, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Callback used by `buffering`; it mimics `is_on_device`, which is used # on device backends def callback(f): if f.is_TimeFunction and f.save is not None: return [f.time_dim] else: return None return { 'buffering': lambda i: buffering(i, callback, sregistry, options), 'blocking': lambda i: blocking(i, options), 'factorize': factorize, 'fuse': fuse, 'lift': lambda i: Lift().process( cire(i, 'invariants', sregistry, options, platform)), 'cire-sops': lambda i: cire(i, 'sops', sregistry, options, platform), 'cse': lambda i: cse(i, sregistry), 'opt-pows': optimize_pows, 'topofuse': lambda i: fuse(i, toposort=True) }
def _specialize_clusters(cls, clusters, **kwargs): """ Optimize Clusters for better runtime performance. """ # To create temporaries counter = generator() template = lambda: "r%d" % counter() # Toposort+Fusion (the former to expose more fusion opportunities) clusters = Toposort().process(clusters) clusters = fuse(clusters) # Flop reduction via the DSE clusters = rewrite(clusters, template, **kwargs) # Lifting clusters = Lift().process(clusters) # Lifting may create fusion opportunities, which in turn may enable # further optimizations clusters = fuse(clusters) clusters = eliminate_arrays(clusters, template) clusters = scalarize(clusters, template) return clusters
def _make_clusters_passes_mapper(cls, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Callbacks used by `Tasking` and `Streaming` runs_on_host, reads_if_on_host = make_callbacks(options) # Callback used by `buffering` def callback(f): if not is_on_device(f, options['gpu-fit']): return [f.time_dim] else: return None return { 'buffering': lambda i: buffering(i, callback, sregistry, options), 'blocking': lambda i: blocking(i, sregistry, options), 'tasking': Tasker(runs_on_host).process, 'streaming': Streaming(reads_if_on_host).process, 'factorize': factorize, 'fission': fission, 'fuse': lambda i: fuse(i, options=options), 'lift': lambda i: Lift().process(cire(i, 'invariants', sregistry, options, platform)), 'cire-sops': lambda i: cire(i, 'sops', sregistry, options, platform), 'cse': lambda i: cse(i, sregistry), 'opt-pows': optimize_pows, 'topofuse': lambda i: fuse(i, toposort=True, options=options) }
def _specialize_clusters(cls, clusters, **kwargs): """ Optimize Clusters for better runtime performance. """ options = kwargs['options'] platform = kwargs['platform'] # To create temporaries counter = generator() template = lambda: "r%d" % counter() # Toposort+Fusion (the former to expose more fusion opportunities) clusters = Toposort().process(clusters) clusters = fuse(clusters) # Hoist and optimize Dimension-invariant sub-expressions clusters = cire(clusters, template, 'invariants', options, platform) clusters = Lift().process(clusters) # Blocking to improve data locality clusters = Blocking(options).process(clusters) # Reduce flops (potential arithmetic alterations) clusters = extract_increments(clusters, template) clusters = cire(clusters, template, 'sops', options, platform) clusters = factorize(clusters) clusters = optimize_pows(clusters) clusters = freeze(clusters) # Reduce flops (no arithmetic alterations) clusters = cse(clusters, template) # The previous passes may have created fusion opportunities, which in # turn may enable further optimizations clusters = fuse(clusters) clusters = eliminate_arrays(clusters, template) clusters = scalarize(clusters, template) return clusters
def _specialize_clusters(cls, clusters, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Optimize MultiSubDomains clusters = optimize_msds(clusters) # Toposort+Fusion (the former to expose more fusion opportunities) clusters = fuse(clusters, toposort=True, options=options) # Fission to increase parallelism clusters = fission(clusters) # Hoist and optimize Dimension-invariant sub-expressions clusters = cire(clusters, 'invariants', sregistry, options, platform) clusters = Lift().process(clusters) # Blocking to define thread blocks if options['blockeager']: clusters = blocking(clusters, sregistry, options) # Reduce flops clusters = extract_increments(clusters, sregistry) clusters = cire(clusters, 'sops', sregistry, options, platform) clusters = factorize(clusters) clusters = optimize_pows(clusters) # The previous passes may have created fusion opportunities clusters = fuse(clusters) # Reduce flops clusters = cse(clusters, sregistry) # Blocking to define thread blocks if options['blocklazy']: clusters = blocking(clusters, sregistry, options) return clusters