def invoke_stencil(tile_size, offset=False, unroll=False, view=False): A = np.random.rand(N.get()).astype(np.float64) B1 = np.zeros((N.get()), dtype=np.float64) B2 = np.zeros((N.get()), dtype=np.float64) B3 = np.zeros((N.get()), dtype=np.float64) if offset: sdfg = stencil_offset.to_sdfg() else: sdfg = stencil.to_sdfg() sdfg.apply_strict_transformations() graph = sdfg.nodes()[0] if view: sdfg.view() # baseline sdfg.name = 'baseline' csdfg = sdfg.compile() csdfg(A=A, B=B1, N=N) del csdfg subgraph = SubgraphView(graph, [n for n in graph.nodes()]) st = StencilTiling(subgraph) st.tile_size = (tile_size, ) st.schedule = dace.dtypes.ScheduleType.Sequential assert st.can_be_applied(sdfg, subgraph) if unroll: st.unroll_loops = True st.apply(sdfg) if view: sdfg.view() sdfg.name = 'tiled' sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B2, N=N) del csdfg assert np.allclose(B1, B2) sdfg.apply_strict_transformations() subgraph = SubgraphView(graph, [n for n in graph.nodes()]) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) # also test consolidation sf.consolidate = True sf.apply(sdfg) sdfg.name = 'fused' csdfg = sdfg.compile() csdfg(A=A, B=B3, N=N) del csdfg print(np.linalg.norm(B1)) print(np.linalg.norm(B3)) assert np.allclose(B1, B2) assert np.allclose(B1, B3) print("PASS")
def invoke_stencil(tile_size, offset=False, unroll=False): A = np.random.rand(N.get() * 2).astype(np.float64) B1 = np.zeros((N.get()), dtype=np.float64) B2 = np.zeros((N.get()), dtype=np.float64) B3 = np.zeros((N.get()), dtype=np.float64) if offset: sdfg = stencil_offset.to_sdfg() else: sdfg = stencil.to_sdfg() sdfg.simplify() graph = sdfg.nodes()[0] # baseline sdfg.name = f'baseline_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B1, N=N) del csdfg subgraph = SubgraphView(graph, [n for n in graph.nodes()]) st = StencilTiling() st.setup_match(subgraph) st.tile_size = (tile_size, ) st.unroll_loops = unroll assert st.can_be_applied(sdfg, subgraph) # change schedule so that OMP never fails st.schedule = dace.dtypes.ScheduleType.Sequential st.apply(sdfg) sdfg.name = f'tiled_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B2, N=N) del csdfg sdfg.simplify() subgraph = SubgraphView(graph, [n for n in graph.nodes()]) sf = SubgraphFusion() sf.setup_match(subgraph) assert sf.can_be_applied(sdfg, subgraph) sf.apply(sdfg) sdfg.name = f'fused_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B3, N=N) del csdfg print(np.linalg.norm(B1)) print(np.linalg.norm(B3)) print("PASS")
def apply(self, sdfg): subgraph = self.subgraph_view(sdfg) graph = subgraph.graph scope_dict = graph.scope_dict() map_entries = helpers.get_outermost_scope_maps(sdfg, graph, subgraph, scope_dict) first_entry = next(iter(map_entries)) if self.allow_expansion: expansion = MultiExpansion() expansion.setup_match(subgraph, self.sdfg_id, self.state_id) expansion.permutation_only = not self.expansion_split if expansion.can_be_applied(sdfg, subgraph): expansion.apply(sdfg) sf = SubgraphFusion() sf.setup_match(subgraph, self.sdfg_id, self.state_id) if sf.can_be_applied(sdfg, self.subgraph_view(sdfg)): # set SubgraphFusion properties sf.debug = self.debug sf.transient_allocation = self.transient_allocation sf.schedule_innermaps = self.schedule_innermaps sf.apply(sdfg) self._global_map_entry = sf._global_map_entry return elif self.allow_tiling == True: st = StencilTiling() st.setup_match(subgraph, self.sdfg_id, self.state_id) if st.can_be_applied(sdfg, self.subgraph_view(sdfg)): # set StencilTiling properties st.debug = self.debug st.unroll_loops = self.stencil_unroll_loops st.strides = self.stencil_strides st.apply(sdfg) # StencilTiling: update nodes new_entries = st._outer_entries subgraph = helpers.subgraph_from_maps(sdfg, graph, new_entries) sf = SubgraphFusion() sf.setup_match(subgraph, self.sdfg_id, self.state_id) # set SubgraphFusion properties sf.debug = self.debug sf.transient_allocation = self.transient_allocation sf.schedule_innermaps = self.schedule_innermaps sf.apply(sdfg) self._global_map_entry = sf._global_map_entry return warnings.warn("CompositeFusion::Apply did not perform as expected")