def _lower_iet(cls, stree, profiler, **kwargs): """ Iteration/Expression tree lowering: * Turn a ScheduleTree into an Iteration/Expression tree; * Introduce distributed-memory, shared-memory, and SIMD parallelism; * Introduce optimizations for data locality; * Finalize (e.g., symbol definitions, array casts) """ name = kwargs.get("name", "Kernel") sregistry = kwargs['sregistry'] # Build an IET from a ScheduleTree iet = iet_build(stree) # Analyze the IET Sections for C-level profiling profiler.analyze(iet) # Wrap the IET with an EntryFunction (a special Callable representing # the entry point of the generated library) parameters = derive_parameters(iet, True) iet = EntryFunction(name, iet, 'int', parameters, ()) # Lower IET to a target-specific IET graph = Graph(iet) graph = cls._specialize_iet(graph, **kwargs) # Instrument the IET for C-level profiling # Note: this is postponed until after _specialize_iet because during # specialization further Sections may be introduced instrument(graph, profiler=profiler, sregistry=sregistry) return graph.root, graph
def test_strides_forwarding1(): grid = Grid(shape=(4, 4)) a = Array(name='a', dimensions=grid.dimensions, shape=grid.shape) bar = Callable('bar', DummyExpr(a[0, 0], 0), 'void', parameters=[a.indexed]) call = Call(bar.name, [a.indexed]) foo = Callable('foo', call, 'void', parameters=[a]) # Emulate what the compiler would do graph = Graph(foo) graph.efuncs['bar'] = bar linearize(graph, mode=True, sregistry=SymbolRegistry()) # Despite `a` is passed via `a.indexed`, and since it's an Array (which # have symbolic shape), we expect the stride exprs to be placed in `bar`, # and in `bar` only, as `foo` doesn't really use `a`, it just propagates it # down to `bar` foo = graph.root bar = graph.efuncs['bar'] assert len(foo.body.body) == 1 assert foo.body.body[0].is_Call assert len(bar.body.body) == 5 assert bar.body.body[0].write.name == 'y_fsz0' assert bar.body.body[2].write.name == 'y_stride0'
def _lower_iet(cls, stree, profiler, **kwargs): """ Iteration/Expression tree lowering: * Turn a ScheduleTree into an Iteration/Expression tree; * Perform analysis to detect optimization opportunities; * Introduce distributed-memory, shared-memory, and SIMD parallelism; * Introduce optimizations for data locality; * Finalize (e.g., symbol definitions, array casts) """ name = kwargs.get("name", "Kernel") iet = iet_build(stree) # Instrument the IET for C-level profiling iet = profiler.instrument(iet) # Wrap the IET with a Callable parameters = derive_parameters(iet, True) iet = Callable(name, iet, 'int', parameters, ()) # Lower IET to a target-specific IET graph = Graph(iet) graph = cls._specialize_iet(graph, **kwargs) return graph.root, graph
def test_strides_forwarding0(): grid = Grid(shape=(4, 4)) f = Function(name='f', grid=grid) bar = Callable('bar', DummyExpr(f[0, 0], 0), 'void', parameters=[f.indexed]) call = Call(bar.name, [f.indexed]) foo = Callable('foo', call, 'void', parameters=[f]) # Emulate what the compiler would do graph = Graph(foo) graph.efuncs['bar'] = bar linearize(graph, mode=True, sregistry=SymbolRegistry()) # Since `f` is passed via `f.indexed`, we expect the stride exprs to be # lifted in `foo` and then passed down to `bar` as arguments foo = graph.root bar = graph.efuncs['bar'] assert foo.body.body[0].write.name == 'y_fsz0' assert foo.body.body[2].write.name == 'y_stride0' assert len(foo.body.body[4].arguments) == 2 assert len(bar.parameters) == 2 assert bar.parameters[1].name == 'y_stride0' assert len(bar.body.body) == 1