def allocate_array(self, sdfg: dace.SDFG, dfg: StateSubgraphView, state_id: int, node: nodes.AccessNode, function_stream: CodeIOStream, declaration_stream: CodeIOStream, allocation_stream: CodeIOStream): name = node.data nodedesc = node.desc(sdfg) # Based on the hardware, the total size must be 16^2 assert nodedesc.total_size == 16 * 16 # Majority is detected by the strides of the data maj = 'row' if nodedesc.strides[-1] == 1 else 'col' # Write a fragment based on the storage type if nodedesc.storage == dace.StorageType.TensorCore_Accumulator: declaration_stream.write( 'wmma::fragment<wmma::accumulator, ' '16, 16, 16, float> {};'.format(name), sdfg, state_id, node) else: declaration_stream.write( 'wmma::fragment<wmma::matrix_{mat}, ' '16, 16, 16, half, wmma::{maj}_major> ' '{name};'.format( mat=('a' if 'A' in nodedesc.storage.name else 'b'), maj=maj, name=name), sdfg, state_id, node)
def is_array_stream_view(sdfg: SDFG, dfg: SDFGState, node: nd.AccessNode): """ Test whether a stream is directly connected to an array. """ # Test all memlet paths from the array. If the path goes directly # to/from a stream, construct a stream array view all_source_paths = [] source_paths = [] all_sink_paths = [] sink_paths = [] for e in dfg.in_edges(node): src_node = dfg.memlet_path(e)[0].src # Append empty path to differentiate between a copy and an array-view if isinstance(src_node, nd.CodeNode): all_source_paths.append(None) # Append path from source node if isinstance(src_node, nd.AccessNode) and isinstance( src_node.desc(sdfg), dt.Array): source_paths.append(src_node) for e in dfg.out_edges(node): sink_node = dfg.memlet_path(e)[-1].dst # Append empty path to differentiate between a copy and an array-view if isinstance(sink_node, nd.CodeNode): all_sink_paths.append(None) # Append path to sink node if isinstance(sink_node, nd.AccessNode) and isinstance( sink_node.desc(sdfg), dt.Array): sink_paths.append(sink_node) all_sink_paths.extend(sink_paths) all_source_paths.extend(source_paths) # Special case: stream can be represented as a view of an array if ((len(all_source_paths) > 0 and len(sink_paths) == 1) or (len(all_sink_paths) > 0 and len(source_paths) == 1)): # TODO: What about a source path? arrnode = sink_paths[0] # Only works if the stream itself is not an array of streams if list(node.desc(sdfg).shape) == [1]: node.desc(sdfg).sink = arrnode.data # For memlet generation arrnode.desc( sdfg).src = node.data # TODO: Move src/sink to node, not array return True return False
def on_node_end(self, sdfg: SDFG, state: SDFGState, node: nodes.AccessNode, outer_stream: CodeIOStream, inner_stream: CodeIOStream, global_stream: CodeIOStream): from dace.codegen.dispatcher import DefinedType # Avoid import loop if is_devicelevel_gpu(sdfg, state, node) or is_devicelevel_fpga( sdfg, state, node): # Only run on host code return desc = node.desc(sdfg) # Obtain a pointer for arrays and scalars ptrname = cpp.ptr(node.data, desc, sdfg, self.codegen) defined_type, _ = self.codegen.dispatcher.defined_vars.get(ptrname) if defined_type == DefinedType.Scalar: ptrname = '&' + ptrname # Create UUID state_id = sdfg.node_id(state) node_id = state.node_id(node) uuid = f'{sdfg.sdfg_id}_{state_id}_{node_id}' # Get optional pre/postamble for instrumenting device data preamble, postamble = '', '' if desc.storage == dtypes.StorageType.GPU_Global: self._setup_gpu_runtime(sdfg, global_stream) preamble, postamble, ptrname = self._generate_copy_to_host( node, desc, ptrname) # Encode runtime shape and strides shape = ', '.join(cpp.sym2cpp(s) for s in desc.shape) strides = ', '.join(cpp.sym2cpp(s) for s in desc.strides) # Write code inner_stream.write(preamble, sdfg, state_id, node_id) inner_stream.write( f'__state->serializer->save({ptrname}, {cpp.sym2cpp(desc.total_size - desc.start_offset)}, ' f'"{node.data}", "{uuid}", {shape}, {strides});\n', sdfg, state_id, node_id) inner_stream.write(postamble, sdfg, state_id, node_id)