def test_subgraph(): A, expected = config() B_init = np.random.rand(2) graph = mapfission_sdfg() graph.apply_transformations(MapFission) dace.sdfg.propagation.propagate_memlets_sdfg(graph) cgraph = graph.compile() B = dcpy(B_init) cgraph(A=A, B=B) del cgraph assert np.allclose(B, expected) graph.validate() subgraph = SubgraphView(graph.nodes()[0], graph.nodes()[0].nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(graph, subgraph) fusion(graph, graph.nodes()[0], None) ccgraph = graph.compile() B = dcpy(B_init) ccgraph(A=A, B=B) assert np.allclose(B, expected) graph.validate()
def test_quantitatively(sdfg): graph = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(N.get()).astype(np.float64) C1 = np.random.rand(N.get()).astype(np.float64) C2 = np.random.rand(N.get()).astype(np.float64) D1 = np.random.rand(N.get()).astype(np.float64) D2 = np.random.rand(N.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C1, D=D1, N=N) subgraph = SubgraphView(graph, [node for node in graph.nodes()]) expansion = MultiExpansion() fusion = SubgraphFusion() assert expansion.match(sdfg, subgraph) == True expansion.apply(sdfg, subgraph) assert fusion.match(sdfg, subgraph) == True fusion.apply(sdfg, subgraph) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C2, D=D2, N=N) assert np.allclose(C1, C2) assert np.allclose(D1, D2)
def fusion(sdfg: dace.SDFG, graph: dace.SDFGState, subgraph: Union[SubgraphView, List[SubgraphView]] = None, **kwargs): subgraph = graph if not subgraph else subgraph if not isinstance(subgraph, list): subgraph = [subgraph] map_fusion = SubgraphFusion(subgraph[0]) for (property, val) in kwargs.items(): setattr(map_fusion, property, val) for sg in subgraph: map_entries = helpers.get_outermost_scope_maps(sdfg, graph, sg) # remove map_entries and their corresponding exits from the subgraph # already before applying transformation if isinstance(sg, SubgraphView): for map_entry in map_entries: sg.nodes().remove(map_entry) if graph.exit_node(map_entry) in sg.nodes(): sg.nodes().remove(graph.exit_node(map_entry)) print(f"Subgraph Fusion on map entries {map_entries}") map_fusion.fuse(sdfg, graph, map_entries) if isinstance(sg, SubgraphView): sg.nodes().append(map_fusion._global_map_entry)
def _test_quantitatively(sdfg): graph = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(N.get()).astype(np.float64) C1 = np.random.rand(N.get()).astype(np.float64) C2 = np.random.rand(N.get()).astype(np.float64) D1 = np.random.rand(N.get()).astype(np.float64) D2 = np.random.rand(N.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C1, D=D1, N=N) del csdfg subgraph = SubgraphView(graph, [node for node in graph.nodes()]) me = MultiExpansion(subgraph) assert me.can_be_applied(sdfg, subgraph) == True me.apply(sdfg) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) == True sf.apply(sdfg) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C2, D=D2, N=N) assert np.allclose(C1, C2) assert np.allclose(D1, D2)
def invoke_stencil(tile_size, offset=False, unroll=False, view=False): A = np.random.rand(N.get()).astype(np.float64) B1 = np.zeros((N.get()), dtype=np.float64) B2 = np.zeros((N.get()), dtype=np.float64) B3 = np.zeros((N.get()), dtype=np.float64) if offset: sdfg = stencil_offset.to_sdfg() else: sdfg = stencil.to_sdfg() sdfg.apply_strict_transformations() graph = sdfg.nodes()[0] if view: sdfg.view() # baseline sdfg._name = 'baseline' sdfg.save('baseline.sdfg') csdfg = sdfg.compile() csdfg(A=A, B=B1, N=N) del csdfg subgraph = SubgraphView(graph, [n for n in graph.nodes()]) st = StencilTiling(subgraph) st.tile_size = (tile_size, ) st.schedule = dace.dtypes.ScheduleType.Sequential assert st.can_be_applied(sdfg, subgraph) if unroll: st.unroll_loops = True st.apply(sdfg) if view: sdfg.view() sdfg._name = 'tiled' sdfg.validate() sdfg.save('tiled.sdfg') csdfg = sdfg.compile() csdfg(A=A, B=B2, N=N) del csdfg assert np.allclose(B1, B2) sdfg.apply_strict_transformations() subgraph = SubgraphView(graph, [n for n in graph.nodes()]) sf = SubgraphFusion(subgraph) # also test consolidation sf.consolidate = True sf.apply(sdfg) sdfg._name = 'fused' sdfg.save('fused.sdfg') csdfg = sdfg.compile() csdfg(A=A, B=B3, N=N) del csdfg print(np.linalg.norm(B1)) print(np.linalg.norm(B3)) assert np.allclose(B1, B2) assert np.allclose(B1, B3) print("PASS")
def test_p3(): sdfg = disjoint_test_3.to_sdfg() sdfg.simplify() state = sdfg.nodes()[0] assert len(sdfg.nodes()) == 1 subgraph = SubgraphView(state, state.nodes()) sf = SubgraphFusion(subgraph) assert not sf.can_be_applied(sdfg, subgraph)
def test_p2(): sdfg = disjoint_test_2.to_sdfg() sdfg.apply_strict_transformations() state = sdfg.nodes()[0] assert len(sdfg.nodes()) == 1 subgraph = SubgraphView(state, state.nodes()) sf = SubgraphFusion(subgraph) assert not sf.can_be_applied(sdfg, subgraph)
def test_inputs_outputs(): """ Test subgraphs where the computation modules that are in the middle connect to the outside. """ sdfg = dace.SDFG('inputs_outputs_fission') sdfg.add_array('in1', [2], dace.float64) sdfg.add_array('in2', [2], dace.float64) sdfg.add_scalar('tmp', dace.float64, transient=True) sdfg.add_array('out1', [2], dace.float64) sdfg.add_array('out2', [2], dace.float64) state = sdfg.add_state() in1 = state.add_read('in1') in2 = state.add_read('in2') out1 = state.add_write('out1') out2 = state.add_write('out2') me, mx = state.add_map('outer', dict(i='0:2')) t1 = state.add_tasklet('t1', {'i1'}, {'o1', 'o2'}, 'o1 = i1 * 2; o2 = i1 * 5') t2 = state.add_tasklet('t2', {'i1', 'i2'}, {'o1'}, 'o1 = i1 * i2') state.add_memlet_path(in1, me, t1, dst_conn='i1', memlet=dace.Memlet.simple('in1', 'i')) state.add_memlet_path(in2, me, t2, dst_conn='i2', memlet=dace.Memlet.simple('in2', 'i')) state.add_edge(t1, 'o1', t2, 'i1', dace.Memlet.simple('tmp', '0')) state.add_memlet_path(t2, mx, out1, src_conn='o1', memlet=dace.Memlet.simple('out1', 'i')) state.add_memlet_path(t1, mx, out2, src_conn='o2', memlet=dace.Memlet.simple('out2', 'i')) sdfg.apply_transformations(MapFission) dace.sdfg.propagation.propagate_memlets_sdfg(sdfg) # Test A, B, C, D = tuple(np.random.rand(2) for _ in range(4)) expected_C = (A * 2) * B expected_D = A * 5 csdfg = sdfg.compile() C_cpy = deepcopy(C) D_cpy = deepcopy(D) csdfg(in1=A, in2=B, out1=C_cpy, out2=D_cpy) del csdfg assert np.allclose(C_cpy, expected_C) assert np.allclose(D_cpy, expected_D) subgraph = SubgraphView(sdfg.nodes()[0], sdfg.nodes()[0].nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) fusion(sdfg, sdfg.nodes()[0], None) C_cpy = deepcopy(C) D_cpy = deepcopy(D) csdfg = sdfg.compile() csdfg(in1=A, in2=B, out1=C_cpy, out2=D_cpy) del csdfg assert np.allclose(C_cpy, expected_C) assert np.allclose(D_cpy, expected_D)
def test_offsets_array(): sdfg = dace.SDFG('mapfission_offsets2') sdfg.add_array('A', [20], dace.float64) sdfg.add_array('interim', [1], dace.float64, transient=True) state = sdfg.add_state() me, mx = state.add_map('outer', dict(i='10:20')) t1 = state.add_tasklet('addone', {'a'}, {'b'}, 'b = a + 1') interim = state.add_access('interim') t2 = state.add_tasklet('addtwo', {'a'}, {'b'}, 'b = a + 2') aread = state.add_read('A') awrite = state.add_write('A') state.add_memlet_path(aread, me, t1, dst_conn='a', memlet=dace.Memlet.simple('A', 'i')) state.add_edge(t1, 'b', interim, None, dace.Memlet.simple('interim', '0')) state.add_edge(interim, None, t2, 'a', dace.Memlet.simple('interim', '0')) state.add_memlet_path(t2, mx, awrite, src_conn='b', memlet=dace.Memlet.simple('A', 'i')) sdfg.apply_transformations(MapFission) dace.propagate_memlets_sdfg(sdfg) sdfg.validate() # Test A = np.random.rand(20) expected = A.copy() expected[10:] += 3 A_cpy = A.copy() csdfg = sdfg.compile() csdfg(A=A_cpy) del csdfg print(np.linalg.norm(A_cpy)) print(np.linalg.norm(expected)) assert (np.allclose(A_cpy, expected)) subgraph = SubgraphView(sdfg.nodes()[0], sdfg.nodes()[0].nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) fusion(sdfg, sdfg.nodes()[0], None) A_cpy = A.copy() csdfg = sdfg.compile() csdfg(A=A_cpy) assert (np.allclose(A_cpy, expected))
def invoke_stencil(tile_size, offset=False, unroll=False): A = np.random.rand(N.get() * 2).astype(np.float64) B1 = np.zeros((N.get()), dtype=np.float64) B2 = np.zeros((N.get()), dtype=np.float64) B3 = np.zeros((N.get()), dtype=np.float64) if offset: sdfg = stencil_offset.to_sdfg() else: sdfg = stencil.to_sdfg() sdfg.simplify() graph = sdfg.nodes()[0] # baseline sdfg.name = f'baseline_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B1, N=N) del csdfg subgraph = SubgraphView(graph, [n for n in graph.nodes()]) st = StencilTiling(subgraph) st.tile_size = (tile_size, ) st.unroll_loops = unroll assert st.can_be_applied(sdfg, subgraph) # change schedule so that OMP never fails st.schedule = dace.dtypes.ScheduleType.Sequential st.apply(sdfg) sdfg.name = f'tiled_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B2, N=N) del csdfg sdfg.simplify() subgraph = SubgraphView(graph, [n for n in graph.nodes()]) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) sf.apply(sdfg) sdfg.name = f'fused_{tile_size}_{offset}_{unroll}' csdfg = sdfg.compile() csdfg(A=A, B=B3, N=N) del csdfg print(np.linalg.norm(B1)) print(np.linalg.norm(B3)) print("PASS")
def test_quantitatively(sdfg, graph): A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(M.get()).astype(np.float64) C = np.random.rand(O.get()).astype(np.float64) out1_base = np.ndarray((N.get(), M.get()), np.float64) out2_base = np.ndarray((1), np.float64) out3_base = np.ndarray((N.get(), M.get(), O.get()), np.float64) out1 = np.ndarray((N.get(), M.get()), np.float64) out2 = np.ndarray((1), np.float64) out3 = np.ndarray((N.get(), M.get(), O.get()), np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, out1=out1_base, out2=out2_base, out3=out3_base, N=N, M=M, O=O) expand_reduce(sdfg, graph) expand_maps(sdfg, graph) subgraph = SubgraphView(graph, [node for node in graph.nodes()]) assert SubgraphFusion.match(sdfg, subgraph) == True fusion(sdfg, graph) sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, out1=out1, out2=out2, out3=out3, N=N, M=M, O=O) assert np.allclose(out1, out1_base) assert np.allclose(out2, out2_base) assert np.allclose(out3, out3_base) print('PASS')
def test_p1(): N.set(20) M.set(30) O.set(50) P.set(40) Q.set(42) R.set(25) sdfg = program.to_sdfg() sdfg.apply_strict_transformations() state = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(M.get()).astype(np.float64) C = np.random.rand(O.get()).astype(np.float64) D = np.random.rand(M.get()).astype(np.float64) E = np.random.rand(N.get()).astype(np.float64) F = np.random.rand(P.get()).astype(np.float64) G = np.random.rand(M.get()).astype(np.float64) H = np.random.rand(P.get()).astype(np.float64) I = np.random.rand(N.get()).astype(np.float64) J = np.random.rand(R.get()).astype(np.float64) X = np.random.rand(N.get()).astype(np.float64) Y = np.random.rand(M.get()).astype(np.float64) Z = np.random.rand(P.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J, X=X, Y=Y, Z=Z,\ N=N, M=M, O=O, P=P, R=R,Q=Q) del csdfg subgraph = SubgraphView(state, [node for node in state.nodes()]) expansion = MultiExpansion(subgraph) fusion = SubgraphFusion(subgraph) assert MultiExpansion.can_be_applied(sdfg, subgraph) expansion.apply(sdfg) assert SubgraphFusion.can_be_applied(sdfg, subgraph) fusion.apply(sdfg) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J, X=X, Y=Y, Z=Z,\ N=N, M=M, O=O, P=P, R=R,Q=Q) print("PASS")
def test_warp_softmax(vector_length=1): # Get SDFG sdfg = softmax_fwd.to_sdfg(strict=True) # Apply transformations sdfg.apply_transformations_repeated(ReduceExpansion) MultiExpansion.apply_to(sdfg, sdfg.node(0).nodes()) SubgraphFusion.apply_to(sdfg, sdfg.node(0).nodes()) sdfg.expand_library_nodes() sdfg.apply_strict_transformations() sdfg.apply_transformations_repeated([TrivialMapElimination, MapFusion]) sdfg.apply_transformations(GPUTransformSDFG) sdfg.apply_transformations(WarpTiling) sdfg.apply_transformations_repeated([HoistState, InlineSDFG, StateFusion], strict=True) sdfg.apply_transformations_repeated([TrivialMapElimination, MapFusion]) if vector_length != 1: sdfg.apply_transformations_repeated( Vectorization, dict(vector_len=vector_length, preamble=False, postamble=False, strided_map=False)) sdfg.specialize(dict(dn1=2, dn2=16, dn3=128, dr=128)) # Check validity sdfg.validate() assert sdfg.number_of_nodes() == 1 state = sdfg.node(0) assert len([ c for c in state.scope_children()[None] if isinstance(c, dace.nodes.MapEntry) ]) == 1 # Check correctness inp = np.random.rand(2, 16, 128, 128).astype(np.float32) out = np.random.rand(2, 16, 128, 128).astype(np.float32) reg_out = softmax(inp) sdfg(inp=inp, out=out) assert np.allclose(out, reg_out, rtol=1e-4, atol=1e-6)
def can_be_applied(self, sdfg: SDFG, subgraph: SubgraphView) -> bool: graph = subgraph.graph if self.allow_expansion == True: subgraph_fusion = SubgraphFusion(subgraph) if subgraph_fusion.can_be_applied(sdfg, subgraph): # try w/o copy first return True expansion = MultiExpansion(subgraph) expansion.permutation_only = not self.expansion_split if expansion.can_be_applied(sdfg, subgraph): # deepcopy graph_indices = [ i for (i, n) in enumerate(graph.nodes()) if n in subgraph ] sdfg_copy = copy.deepcopy(sdfg) graph_copy = sdfg_copy.nodes()[sdfg.nodes().index(graph)] subgraph_copy = SubgraphView( graph_copy, [graph_copy.nodes()[i] for i in graph_indices]) expansion.sdfg_id = sdfg_copy.sdfg_id ##sdfg_copy.apply_transformations(MultiExpansion, states=[graph]) #expansion = MultiExpansion(subgraph_copy) expansion.apply(sdfg_copy) subgraph_fusion = SubgraphFusion(subgraph_copy) if subgraph_fusion.can_be_applied(sdfg_copy, subgraph_copy): return True stencil_tiling = StencilTiling(subgraph_copy) if self.allow_tiling and stencil_tiling.can_be_applied( sdfg_copy, subgraph_copy): return True else: subgraph_fusion = SubgraphFusion(subgraph) if subgraph_fusion.can_be_applied(sdfg, subgraph): return True if self.allow_tiling == True: stencil_tiling = StencilTiling(subgraph) if stencil_tiling.can_be_applied(sdfg, subgraph): return True return False
def _test_quantitatively(sdfg, graph): A = np.random.rand(N.get(), M.get(), O.get()).astype(np.float64) B = np.random.rand(N.get(), M.get(), O.get()).astype(np.float64) C1 = np.zeros([N.get(), M.get(), O.get()], dtype=np.float64) C2 = np.zeros([N.get(), M.get(), O.get()], dtype=np.float64) sdfg.validate() csdfg = sdfg.compile() csdfg(A=A, B=B, C=C1, N=N, M=M, O=O) del csdfg subgraph = SubgraphView(graph, graph.nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) fusion(sdfg, graph) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C2, N=N, M=M, O=O) del csdfg assert np.allclose(C1, C2) print('PASS')
def test_p1(): sdfg = disjoint_test_1.to_sdfg() sdfg.simplify() state = sdfg.nodes()[0] assert len(sdfg.nodes()) == 1 A = np.random.rand(M.get(), 2).astype(np.float64) A1 = A.copy() A2 = A.copy() csdfg = sdfg.compile() csdfg(A=A1, N=N, M=M) del csdfg subgraph = SubgraphView(state, state.nodes()) sf = SubgraphFusion(subgraph) assert sf.can_be_applied(sdfg, subgraph) sf.apply(sdfg) csdfg = sdfg.compile() csdfg(A=A2, M=M) del csdfg assert np.allclose(A1, A2)
def test_applyto_subgraph(): sdfg = dbladd.to_sdfg() sdfg.apply_strict_transformations() state = sdfg.node(0) # Apply to subgraph SubgraphFusion.apply_to(sdfg, state.nodes())
def test_applyto_subgraph(): sdfg = dbladd.to_sdfg() sdfg.simplify() state = sdfg.node(0) # Apply to subgraph SubgraphFusion.apply_to(sdfg, state.nodes())
def test_applyto_subgraph(): sdfg = dbladd.to_sdfg() sdfg.coarsen_dataflow() state = sdfg.node(0) # Apply to subgraph SubgraphFusion.apply_to(sdfg, state.nodes())
def apply(self, sdfg): subgraph = self.subgraph_view(sdfg) graph = subgraph.graph scope_dict = graph.scope_dict() map_entries = helpers.get_outermost_scope_maps(sdfg, graph, subgraph, scope_dict) first_entry = next(iter(map_entries)) if self.allow_expansion: expansion = MultiExpansion(subgraph, self.sdfg_id, self.state_id) expansion.permutation_only = not self.expansion_split if expansion.can_be_applied(sdfg, subgraph): expansion.apply(sdfg) sf = SubgraphFusion(subgraph, self.sdfg_id, self.state_id) if sf.can_be_applied(sdfg, self.subgraph_view(sdfg)): # set SubgraphFusion properties sf.debug = self.debug sf.transient_allocation = self.transient_allocation sf.schedule_innermaps = self.schedule_innermaps sf.apply(sdfg) self._global_map_entry = sf._global_map_entry return elif self.allow_tiling == True: st = StencilTiling(subgraph, self.sdfg_id, self.state_id) if st.can_be_applied(sdfg, self.subgraph_view(sdfg)): # set StencilTiling properties st.debug = self.debug st.unroll_loops = self.stencil_unroll_loops st.strides = self.stencil_strides st.apply(sdfg) # StencilTiling: update nodes new_entries = st._outer_entries subgraph = helpers.subgraph_from_maps(sdfg, graph, new_entries) sf = SubgraphFusion(subgraph, self.sdfg_id, self.state_id) # set SubgraphFusion properties sf.debug = self.debug sf.transient_allocation = self.transient_allocation sf.schedule_innermaps = self.schedule_innermaps sf.apply(sdfg) self._global_map_entry = sf._global_map_entry return warnings.warn("CompositeFusion::Apply did not perform as expected")
def can_be_applied(sdfg, subgraph) -> bool: # get highest scope maps graph = subgraph.graph map_entries = set( helpers.get_outermost_scope_maps(sdfg, graph, subgraph)) # 1.1: There has to be more than one outermost scope map entry if len(map_entries) <= 1: return False # 1.2: check basic constraints: # - all parameters have to be the same (this implies same length) # - no parameter permutations here as ambiguity is very high then # - same strides everywhere first_map = next(iter(map_entries)) params = dcpy(first_map.map.params) strides = first_map.map.range.strides() schedule = first_map.map.schedule for map_entry in map_entries: if map_entry.map.params != params: return False if map_entry.map.range.strides() != strides: return False if map_entry.map.schedule != schedule: return False # 1.3: check whether all map entries only differ by a const amount first_entry = next(iter(map_entries)) for map_entry in map_entries: for r1, r2 in zip(map_entry.map.range, first_entry.map.range): if len((r1[0] - r2[0]).free_symbols) > 0: return False if len((r1[1] - r2[1]).free_symbols) > 0: return False # get intermediate_nodes, out_nodes from SubgraphFusion Transformation node_config = SubgraphFusion.get_adjacent_nodes( sdfg, graph, map_entries) (_, intermediate_nodes, out_nodes) = node_config # 1.4: check topological feasibility if not SubgraphFusion.check_topo_feasibility( sdfg, graph, map_entries, intermediate_nodes, out_nodes): return False # 1.5 nodes that are both intermediate and out nodes # are not supported in StencilTiling if len(intermediate_nodes & out_nodes) > 0: return False # get coverages for every map entry coverages = {} memlets = {} for map_entry in map_entries: coverages[map_entry] = StencilTiling.coverage_dicts( sdfg, graph, map_entry) memlets[map_entry] = StencilTiling.coverage_dicts( sdfg, graph, map_entry, outer_range=False) # get DAG neighbours for each map dag_neighbors = StencilTiling.topology(sdfg, graph, map_entries) (children_dict, _, sink_maps) = dag_neighbors # 1.6: we now check coverage: # each outgoing coverage for a data memlet has to # be exactly equal to the union of incoming coverages # of all chidlren map memlets of this data # important: # 1. it has to be equal and not only cover it in order to # account for ranges too long # 2. we check coverages by map parameter and not by # array, this way it is even more general # 3. map parameter coverages are checked for each # (map_entry, children of this map_entry) - pair for map_entry in map_entries: # get coverage from current map_entry map_coverage = coverages[map_entry][1] # final mapping map_parameter -> coverage will be stored here param_parent_coverage = {p: None for p in map_entry.params} param_children_coverage = {p: None for p in map_entry.params} for child_entry in children_dict[map_entry]: # get mapping data_name -> coverage for (data_name, cov) in map_coverage.items(): parent_coverage = cov children_coverage = None if data_name in coverages[child_entry][0]: children_coverage = subsets.union( children_coverage, coverages[child_entry][0][data_name]) # extend mapping map_parameter -> coverage # by the previous mapping for i, (p_subset, c_subset) in enumerate( zip(parent_coverage, children_coverage)): # transform into subset p_subset = subsets.Range((p_subset, )) c_subset = subsets.Range((c_subset, )) # get associated parameter in memlet params1 = symbolic.symlist( memlets[map_entry][1][data_name][i]).keys() params2 = symbolic.symlist( memlets[child_entry][0][data_name][i]).keys() if params1 != params2: return False params = params1 if len(params) > 1: # this is not supported return False try: symbol = next(iter(params)) param_parent_coverage[symbol] = subsets.union( param_parent_coverage[symbol], p_subset) param_children_coverage[symbol] = subsets.union( param_children_coverage[symbol], c_subset) except StopIteration: # current dim has no symbol associated. # ignore and continue warnings.warn( f"In map {map_entry}, there is a " "dimension belonging to {data_name} " "that has no map parameter associated.") pass # 1.6: parameter mapping must be the same if param_parent_coverage != param_children_coverage: return False # 1.7: we want all sink maps to have the same range size assert len(sink_maps) > 0 first_sink_map = next(iter(sink_maps)) if not all([ map.range.size() == first_sink_map.range.size() for map in sink_maps ]): return False return True
def test_p1(): N.set(20) M.set(30) O.set(50) P.set(40) Q.set(42) R.set(25) sdfg = subgraph_fusion_parallel.to_sdfg() sdfg.simplify() state = sdfg.nodes()[0] A = np.random.rand(N.get()).astype(np.float64) B = np.random.rand(M.get()).astype(np.float64) C = np.random.rand(O.get()).astype(np.float64) D = np.random.rand(M.get()).astype(np.float64) E = np.random.rand(N.get()).astype(np.float64) F = np.random.rand(P.get()).astype(np.float64) G = np.random.rand(M.get()).astype(np.float64) H = np.random.rand(P.get()).astype(np.float64) I = np.random.rand(N.get()).astype(np.float64) J = np.random.rand(R.get()).astype(np.float64) X = np.random.rand(N.get()).astype(np.float64) Y = np.random.rand(M.get()).astype(np.float64) Z = np.random.rand(P.get()).astype(np.float64) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J, X=X, Y=Y, Z=Z,\ N=N, M=M, O=O, P=P, R=R,Q=Q) del csdfg subgraph = SubgraphView(state, [node for node in state.nodes()]) expansion = MultiExpansion() expansion.setup_match(subgraph) fusion = SubgraphFusion() fusion.setup_match(subgraph) me = MultiExpansion() me.setup_match(subgraph) assert me.can_be_applied(sdfg, subgraph) me.apply(sdfg) sf = SubgraphFusion() sf.setup_match(subgraph) assert sf.can_be_applied(sdfg, subgraph) sf.apply(sdfg) csdfg = sdfg.compile() csdfg(A=A, B=B, C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J, X=X, Y=Y, Z=Z,\ N=N, M=M, O=O, P=P, R=R,Q=Q) print("PASS")