def _make_clusters_passes_mapper(cls, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Callback used by `buffering`; it mimics `is_on_device`, which is used # on device backends def callback(f): if f.is_TimeFunction and f.save is not None: return [f.time_dim] else: return None return { 'buffering': lambda i: buffering(i, callback, sregistry, options), 'blocking': lambda i: blocking(i, options), 'factorize': factorize, 'fuse': fuse, 'lift': lambda i: Lift().process( cire(i, 'invariants', sregistry, options, platform)), 'cire-sops': lambda i: cire(i, 'sops', sregistry, options, platform), 'cse': lambda i: cse(i, sregistry), 'opt-pows': optimize_pows, 'topofuse': lambda i: fuse(i, toposort=True) }
def _specialize_clusters(cls, clusters, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Toposort+Fusion (the former to expose more fusion opportunities) clusters = fuse(clusters, toposort=True) # Hoist and optimize Dimension-invariant sub-expressions clusters = cire(clusters, 'invariants', sregistry, options, platform) clusters = Lift().process(clusters) # Reduce flops (potential arithmetic alterations) clusters = extract_increments(clusters, sregistry) clusters = cire(clusters, 'sops', sregistry, options, platform) clusters = factorize(clusters) clusters = optimize_pows(clusters) # The previous passes may have created fusion opportunities clusters = fuse(clusters) # Reduce flops (no arithmetic alterations) clusters = cse(clusters, sregistry) # Blocking to improve data locality clusters = blocking(clusters, options) return clusters
def _make_clusters_passes_mapper(cls, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] runs_on_host, reads_if_on_host = make_callbacks(options) return { 'blocking': lambda i: blocking(i, options), 'tasking': Tasker(runs_on_host).process, 'streaming': Streaming(reads_if_on_host).process, 'factorize': factorize, 'fuse': fuse, 'lift': lambda i: Lift().process( cire(i, 'invariants', sregistry, options, platform)), 'cire-sops': lambda i: cire(i, 'sops', sregistry, options, platform), 'cse': lambda i: cse(i, sregistry), 'opt-pows': optimize_pows, 'topofuse': lambda i: fuse(i, toposort=True) }
def _make_clusters_passes_mapper(cls, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Callbacks used by `Tasking` and `Streaming` runs_on_host, reads_if_on_host = make_callbacks(options) # Callback used by `buffering` def callback(f): if not is_on_device(f, options['gpu-fit']): return [f.time_dim] else: return None return { 'buffering': lambda i: buffering(i, callback, sregistry, options), 'blocking': lambda i: blocking(i, sregistry, options), 'tasking': Tasker(runs_on_host).process, 'streaming': Streaming(reads_if_on_host).process, 'factorize': factorize, 'fission': fission, 'fuse': lambda i: fuse(i, options=options), 'lift': lambda i: Lift().process(cire(i, 'invariants', sregistry, options, platform)), 'cire-sops': lambda i: cire(i, 'sops', sregistry, options, platform), 'cse': lambda i: cse(i, sregistry), 'opt-pows': optimize_pows, 'topofuse': lambda i: fuse(i, toposort=True, options=options) }
def _specialize_clusters(cls, clusters, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] # Optimize MultiSubDomains clusters = optimize_msds(clusters) # Toposort+Fusion (the former to expose more fusion opportunities) clusters = fuse(clusters, toposort=True, options=options) # Fission to increase parallelism clusters = fission(clusters) # Hoist and optimize Dimension-invariant sub-expressions clusters = cire(clusters, 'invariants', sregistry, options, platform) clusters = Lift().process(clusters) # Blocking to define thread blocks if options['blockeager']: clusters = blocking(clusters, sregistry, options) # Reduce flops clusters = extract_increments(clusters, sregistry) clusters = cire(clusters, 'sops', sregistry, options, platform) clusters = factorize(clusters) clusters = optimize_pows(clusters) # The previous passes may have created fusion opportunities clusters = fuse(clusters) # Reduce flops clusters = cse(clusters, sregistry) # Blocking to define thread blocks if options['blocklazy']: clusters = blocking(clusters, sregistry, options) return clusters
def _make_clusters_passes_mapper(cls, **kwargs): options = kwargs['options'] platform = kwargs['platform'] sregistry = kwargs['sregistry'] return { 'blocking': lambda i: blocking(i, options), 'factorize': factorize, 'fuse': fuse, 'lift': lambda i: Lift().process(cire(i, 'invariants', sregistry, options, platform)), 'cire-sops': lambda i: cire(i, 'sops', sregistry, options, platform), 'cse': lambda i: cse(i, sregistry), 'opt-pows': optimize_pows, 'topofuse': lambda i: fuse(i, toposort=True) }