def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView, state_id: int, function_stream: CodeIOStream, callsite_stream: CodeIOStream): entry_node = scope.source_nodes()[0] index_list = [] for begin, end, stride in entry_node.map.range: l = [] while begin <= end: l.append(begin) begin += stride index_list.append(l) for indices in product(*index_list): callsite_stream.write('{') for param, index in zip(entry_node.map.params, indices): callsite_stream.write(f'auto {param} = {sym2cpp(index)};') self._dispatcher.dispatch_subgraph(sdfg, scope, state_id, function_stream, callsite_stream, skip_entry_node=True, skip_exit_node=True) callsite_stream.write('}')
def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView, state_id: int, function_stream: CodeIOStream, callsite_stream: CodeIOStream): entry_node: nd.MapEntry = scope.source_nodes()[0] index_list = [] for begin, end, stride in entry_node.map.range: l = [] while begin <= end: l.append(begin) begin += stride index_list.append(l) sdfgconsts = sdfg.constants_prop sdfg.constants_prop = copy.deepcopy(sdfg.constants_prop) mapsymboltypes = entry_node.new_symbols(sdfg, scope, [entry_node.map.params]) for indices in product(*index_list): callsite_stream.write('{') nsdfg_unroll_info = None for param, index in zip(entry_node.map.params, indices): if nsdfg_unroll_info is None: nsdfg_unroll_info = self.nsdfg_prepare_unroll( scope, str(param), str(index)) else: self.nsdfg_prepare_unroll(scope, str(param), str(index)) callsite_stream.write( f"constexpr {mapsymboltypes[param]} {param} = " f"{dace.codegen.targets.common.sym2cpp(index)};\n", sdfg) sdfg.add_constant(param, int(index)) callsite_stream.write('{') self._dispatcher.dispatch_subgraph( sdfg, scope, state_id, function_stream, callsite_stream, skip_entry_node=True, skip_exit_node=True, ) callsite_stream.write('}') callsite_stream.write('}') self.nsdfg_after_unroll(nsdfg_unroll_info) sdfg.constants_prop = sdfgconsts
def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView, state_id: int, function_stream: CodeIOStream, callsite_stream: CodeIOStream): entry_node = scope.source_nodes()[0] loop_type = list(set([sdfg.arrays[a].dtype for a in sdfg.arrays]))[0] ltype_size = loop_type.bytes long_type = copy.copy(dace.int64) long_type.ctype = 'int64_t' self.counter_type = { 1: dace.int8, 2: dace.int16, 4: dace.int32, 8: long_type }[ltype_size] callsite_stream.write('{') # Define all input connectors of the map entry state_dfg = sdfg.node(state_id) for e in dace.sdfg.dynamic_map_inputs(state_dfg, entry_node): if e.data.data != e.dst_conn: callsite_stream.write( self.cpu_codegen.memlet_definition( sdfg, e.data, False, e.dst_conn, e.dst.in_connectors[e.dst_conn]), sdfg, state_id, entry_node) # We only create an SVE do-while in the innermost loop for param, rng in zip(entry_node.map.params, entry_node.map.range): begin, end, stride = (sym2cpp(r) for r in rng) self.dispatcher.defined_vars.enter_scope(sdfg) # Check whether we are in the innermost loop if param != entry_node.map.params[-1]: # Default C++ for-loop callsite_stream.write( f'for(auto {param} = {begin}; {param} <= {end}; {param} += {stride}) {{' ) else: # Generate the SVE loop header # The name of our loop predicate is always __pg_{param} self.dispatcher.defined_vars.add('__pg_' + param, DefinedType.Scalar, 'svbool_t') # Declare our counting variable (e.g. i) and precompute the loop predicate for our range callsite_stream.write( f'''{self.counter_type} {param} = {begin}; svbool_t __pg_{param} = svwhilele_b{ltype_size * 8}({param}, ({self.counter_type}) {end}); do {{''', sdfg, state_id, entry_node) # Dispatch the subgraph generation self.dispatcher.dispatch_subgraph(sdfg, scope, state_id, function_stream, callsite_stream, skip_entry_node=True, skip_exit_node=True) # Close the loops from above (in reverse) for param, rng in zip(reversed(entry_node.map.params), reversed(entry_node.map.range)): # The innermost loop is SVE and needs a special while-footer, otherwise we just add the closing bracket if param != entry_node.map.params[-1]: # Close the default C++ for-loop callsite_stream.write('}') else: # Generate the SVE loop footer _, end, stride = (sym2cpp(r) for r in rng) # Increase the counting variable (according to the number of processed elements) # Then recompute the loop predicate and test for it callsite_stream.write( f'''{param} += svcntp_b{ltype_size * 8}(__pg_{param}, __pg_{param}) * {stride}; __pg_{param} = svwhilele_b{ltype_size * 8}({param}, ({self.counter_type}) {end}); }} while(svptest_any(svptrue_b{ltype_size * 8}(), __pg_{param}));''', sdfg, state_id, entry_node) self.dispatcher.defined_vars.exit_scope(sdfg) callsite_stream.write('}')
def generate_scope(self, sdfg: dace.SDFG, scope: ScopeSubgraphView, state_id: int, function_stream: CodeIOStream, callsite_stream: CodeIOStream): entry_node = scope.source_nodes()[0] current_map = entry_node.map self.current_map = current_map if len(current_map.params) > 1: raise util.NotSupportedError('SVE map must be one dimensional') loop_types = list(set([util.get_base_type(sdfg.arrays[a].dtype) for a in sdfg.arrays])) # Edge case if no arrays are used loop_type = loop_types[0] if len(loop_types) > 0 else dace.int64 ltype_size = loop_type.bytes long_type = copy.copy(dace.int64) long_type.ctype = 'int64_t' self.counter_type = {1: dace.int8, 2: dace.int16, 4: dace.int32, 8: long_type}[ltype_size] callsite_stream.write('{') self.dispatcher.defined_vars.enter_scope(scope) # Define all dynamic input connectors of the map entry state_dfg = sdfg.node(state_id) for e in dace.sdfg.dynamic_map_inputs(state_dfg, entry_node): if e.data.data != e.dst_conn: callsite_stream.write( self.cpu_codegen.memlet_definition(sdfg, e.data, False, e.dst_conn, e.dst.in_connectors[e.dst_conn]), sdfg, state_id, entry_node) param = current_map.params[0] rng = current_map.range[0] begin, end, stride = (sym2cpp(r) for r in rng) # Generate the SVE loop header # The name of our loop predicate is always __pg_{param} self.dispatcher.defined_vars.add('__pg_' + param, DefinedType.Scalar, 'svbool_t') # Declare our counting variable (e.g. i) and precompute the loop predicate for our range callsite_stream.write(f'{self.counter_type} {param} = {begin};') end_param = f'__{param}_to' callsite_stream.write(f'{self.counter_type} {end_param} = {end};') callsite_stream.write(f'svbool_t __pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});') # Test for the predicate callsite_stream.write(f'while(svptest_any(svptrue_b{ltype_size * 8}(), __pg_{param})) {{') # Allocate scope related memory for node, _ in scope.all_nodes_recursive(): if isinstance(node, nodes.Tasklet): # Create empty shared registers for outputs into other tasklets for edge in state_dfg.out_edges(node): if isinstance(edge.dst, dace.nodes.Tasklet): self.generate_out_register(sdfg, state_dfg, edge, callsite_stream, True) # Dispatch the subgraph generation self.dispatcher.dispatch_subgraph(sdfg, scope, state_id, function_stream, callsite_stream, skip_entry_node=True, skip_exit_node=True) # Increase the counting variable (according to the number of processed elements) size_letter = {1: 'b', 2: 'h', 4: 'w', 8: 'd'}[ltype_size] callsite_stream.write(f'{param} += svcnt{size_letter}() * {stride};') # Then recompute the loop predicate callsite_stream.write(f'__pg_{param} = svwhilele_b{ltype_size * 8}({param}, {end_param});') callsite_stream.write('}') self.dispatcher.defined_vars.exit_scope(scope) callsite_stream.write('}')