Esempio n. 1
0
    def allocate_array(self, sdfg: dace.SDFG, dfg: StateSubgraphView,
                       state_id: int, node: nodes.AccessNode,
                       function_stream: CodeIOStream,
                       declaration_stream: CodeIOStream,
                       allocation_stream: CodeIOStream):
        name = node.data
        nodedesc = node.desc(sdfg)

        # Based on the hardware, the total size must be 16^2
        assert nodedesc.total_size == 16 * 16
        # Majority is detected by the strides of the data
        maj = 'row' if nodedesc.strides[-1] == 1 else 'col'

        # Write a fragment based on the storage type
        if nodedesc.storage == dace.StorageType.TensorCore_Accumulator:
            declaration_stream.write(
                'wmma::fragment<wmma::accumulator, '
                '16, 16, 16, float> {};'.format(name), sdfg, state_id, node)
        else:
            declaration_stream.write(
                'wmma::fragment<wmma::matrix_{mat}, '
                '16, 16, 16, half, wmma::{maj}_major> '
                '{name};'.format(
                    mat=('a' if 'A' in nodedesc.storage.name else 'b'),
                    maj=maj,
                    name=name), sdfg, state_id, node)
Esempio n. 2
0
def is_array_stream_view(sdfg: SDFG, dfg: SDFGState, node: nd.AccessNode):
    """ Test whether a stream is directly connected to an array. """

    # Test all memlet paths from the array. If the path goes directly
    # to/from a stream, construct a stream array view
    all_source_paths = []
    source_paths = []
    all_sink_paths = []
    sink_paths = []
    for e in dfg.in_edges(node):
        src_node = dfg.memlet_path(e)[0].src
        # Append empty path to differentiate between a copy and an array-view
        if isinstance(src_node, nd.CodeNode):
            all_source_paths.append(None)
        # Append path from source node
        if isinstance(src_node, nd.AccessNode) and isinstance(
                src_node.desc(sdfg), dt.Array):
            source_paths.append(src_node)
    for e in dfg.out_edges(node):
        sink_node = dfg.memlet_path(e)[-1].dst

        # Append empty path to differentiate between a copy and an array-view
        if isinstance(sink_node, nd.CodeNode):
            all_sink_paths.append(None)
        # Append path to sink node
        if isinstance(sink_node, nd.AccessNode) and isinstance(
                sink_node.desc(sdfg), dt.Array):
            sink_paths.append(sink_node)

    all_sink_paths.extend(sink_paths)
    all_source_paths.extend(source_paths)

    # Special case: stream can be represented as a view of an array
    if ((len(all_source_paths) > 0 and len(sink_paths) == 1)
            or (len(all_sink_paths) > 0 and len(source_paths) == 1)):
        # TODO: What about a source path?
        arrnode = sink_paths[0]
        # Only works if the stream itself is not an array of streams
        if list(node.desc(sdfg).shape) == [1]:
            node.desc(sdfg).sink = arrnode.data  # For memlet generation
            arrnode.desc(
                sdfg).src = node.data  # TODO: Move src/sink to node, not array
            return True
    return False
Esempio n. 3
0
    def on_node_end(self, sdfg: SDFG, state: SDFGState, node: nodes.AccessNode,
                    outer_stream: CodeIOStream, inner_stream: CodeIOStream,
                    global_stream: CodeIOStream):
        from dace.codegen.dispatcher import DefinedType  # Avoid import loop

        if is_devicelevel_gpu(sdfg, state, node) or is_devicelevel_fpga(
                sdfg, state, node):
            # Only run on host code
            return

        desc = node.desc(sdfg)

        # Obtain a pointer for arrays and scalars
        ptrname = cpp.ptr(node.data, desc, sdfg, self.codegen)
        defined_type, _ = self.codegen.dispatcher.defined_vars.get(ptrname)
        if defined_type == DefinedType.Scalar:
            ptrname = '&' + ptrname

        # Create UUID
        state_id = sdfg.node_id(state)
        node_id = state.node_id(node)
        uuid = f'{sdfg.sdfg_id}_{state_id}_{node_id}'

        # Get optional pre/postamble for instrumenting device data
        preamble, postamble = '', ''
        if desc.storage == dtypes.StorageType.GPU_Global:
            self._setup_gpu_runtime(sdfg, global_stream)
            preamble, postamble, ptrname = self._generate_copy_to_host(
                node, desc, ptrname)

        # Encode runtime shape and strides
        shape = ', '.join(cpp.sym2cpp(s) for s in desc.shape)
        strides = ', '.join(cpp.sym2cpp(s) for s in desc.strides)

        # Write code
        inner_stream.write(preamble, sdfg, state_id, node_id)
        inner_stream.write(
            f'__state->serializer->save({ptrname}, {cpp.sym2cpp(desc.total_size - desc.start_offset)}, '
            f'"{node.data}", "{uuid}", {shape}, {strides});\n', sdfg, state_id,
            node_id)
        inner_stream.write(postamble, sdfg, state_id, node_id)