def invoke_stencil(tile_size, offset=False, unroll=False, view=False): A = np.random.rand(N.get()).astype(np.float64) B1 = np.zeros((N.get()), dtype=np.float64) B2 = np.zeros((N.get()), dtype=np.float64) B3 = np.zeros((N.get()), dtype=np.float64) if offset: sdfg = stencil_offset.to_sdfg() else: sdfg = stencil.to_sdfg() sdfg.simplify() graph = sdfg.nodes()[0] if view: sdfg.view() # baseline sdfg.name = 'baseline' csdfg = sdfg.compile() csdfg(A=A, B=B1, N=N) del csdfg subgraph = SubgraphView(graph, [n for n in graph.nodes()]) st = StencilTiling() st.setup_match(subgraph) st.tile_size = (tile_size, ) st.schedule = dace.dtypes.ScheduleType.Sequential assert st.can_be_applied(sdfg, subgraph) if unroll: st.unroll_loops = True st.apply(sdfg) if view: sdfg.view() sdfg.name = 'tiled' sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B2, N=N) del csdfg assert np.allclose(B1, B2) sdfg.simplify() subgraph = SubgraphView(graph, [n for n in graph.nodes()]) sf = SubgraphFusion() sf.setup_match(subgraph) assert sf.can_be_applied(sdfg, subgraph) # also test consolidation sf.consolidate = True sf.apply(sdfg) sdfg.name = 'fused' csdfg = sdfg.compile() csdfg(A=A, B=B3, N=N) del csdfg print(np.linalg.norm(B1)) print(np.linalg.norm(B3)) assert np.allclose(B1, B2) assert np.allclose(B1, B3) print("PASS")
def invoke_stencil(tile_size, offset=False, unroll=False): A = np.random.rand(N.get() * 2).astype(np.float64) B1 = np.zeros((N.get()), dtype=np.float64) B2 = np.zeros((N.get()), dtype=np.float64) B3 = np.zeros((N.get()), dtype=np.float64) if offset: sdfg = stencil_offset.to_sdfg() else: sdfg = stencil.to_sdfg() sdfg.simplify() graph = sdfg.nodes()[0] # baseline sdfg.name = f'baseline_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B1, N=N) del csdfg subgraph = SubgraphView(graph, [n for n in graph.nodes()]) st = StencilTiling() st.setup_match(subgraph) st.tile_size = (tile_size, ) st.unroll_loops = unroll assert st.can_be_applied(sdfg, subgraph) # change schedule so that OMP never fails st.schedule = dace.dtypes.ScheduleType.Sequential st.apply(sdfg) sdfg.name = f'tiled_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B2, N=N) del csdfg sdfg.simplify() subgraph = SubgraphView(graph, [n for n in graph.nodes()]) sf = SubgraphFusion() sf.setup_match(subgraph) assert sf.can_be_applied(sdfg, subgraph) sf.apply(sdfg) sdfg.name = f'fused_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B3, N=N) del csdfg print(np.linalg.norm(B1)) print(np.linalg.norm(B3)) print("PASS")