Ejemplo n.º 1
0
import copy
import dace
import dace.graph.nodes
import numpy as np

# Python version of the SDFG below
# @dace.program
# def reduce_with_strides(A: dace.float64[50, 50], B: dace.float64[25]):
#     B[:] = dace.reduce(lambda a,b: a+b, A[::2, ::2], axis=0,
#                        identity=0)

reduce_with_strides = dace.SDFG('reduce_with_strides')
reduce_with_strides.add_array('A', [50, 50], dace.float64)
reduce_with_strides.add_array('B', [25], dace.float64)

state = reduce_with_strides.add_state()
node_a = state.add_read('A')
node_b = state.add_write('B')
red = state.add_reduce('lambda a,b: a+b', [0], 0)
state.add_nedge(node_a, red, dace.Memlet.simple('A', '0:50:2, 0:50:2'))
state.add_nedge(red, node_b, dace.Memlet.simple('B', '0:25'))


def test_strided_reduce():
    A = np.random.rand(50, 50)
    B = np.random.rand(25)

    sdfg = copy.deepcopy(reduce_with_strides)
    sdfg(A=A, B=B)

    assert np.allclose(B, np.sum(A[::2, ::2], axis=0))
def _make_sdfg(name, storage=dace.dtypes.StorageType.CPU_Heap, isview=False):

    N = dace.symbol('N', dtype=dace.int32, integer=True, positive=True)
    i = dace.symbol('i', dtype=dace.int32, integer=True)

    sdfg = dace.SDFG(name)
    _, A = sdfg.add_array('A', [N, N, N], dtype=dace.float64)
    _, B = sdfg.add_array('B', [N], dtype=dace.float64)
    if isview:
        _, tmp1 = sdfg.add_view('tmp1', [N - 4, N - 4, N - i],
                                dtype=dace.float64,
                                storage=storage,
                                strides=A.strides)
    else:
        _, tmp1 = sdfg.add_transient('tmp1', [N - 4, N - 4, N - i],
                                     dtype=dace.float64,
                                     storage=storage)
    _, tmp2 = sdfg.add_transient('tmp2', [1],
                                 dtype=dace.float64,
                                 storage=storage)

    begin_state = sdfg.add_state("begin", is_start_state=True)
    guard_state = sdfg.add_state("guard")
    body1_state = sdfg.add_state("body1")
    body2_state = sdfg.add_state("body2")
    body3_state = sdfg.add_state("body3")
    end_state = sdfg.add_state("end")

    sdfg.add_edge(begin_state, guard_state,
                  dace.InterstateEdge(assignments=dict(i='0')))
    sdfg.add_edge(guard_state, body1_state,
                  dace.InterstateEdge(condition=f'i<{N}'))
    sdfg.add_edge(guard_state, end_state,
                  dace.InterstateEdge(condition=f'i>={N}'))
    sdfg.add_edge(body1_state, body2_state, dace.InterstateEdge())
    sdfg.add_edge(body2_state, body3_state, dace.InterstateEdge())
    sdfg.add_edge(body3_state, guard_state,
                  dace.InterstateEdge(assignments=dict(i='i+1')))

    if not isview:
        read_a = body1_state.add_read('A')
        write_tmp1 = body1_state.add_write('tmp1')
        body1_state.add_nedge(read_a, write_tmp1,
                              dace.Memlet(f'A[2:{N}-2, 2:{N}-2, i:{N}]'))

    if isview:
        read_a = body2_state.add_read('A')
        read_tmp1 = body2_state.add_access('tmp1')
        body2_state.add_nedge(read_a, read_tmp1,
                              dace.Memlet(f'A[2:{N}-2, 2:{N}-2, i:{N}]'))
    else:
        read_tmp1 = body2_state.add_read('tmp1')
    rednode = standard.Reduce(wcr='lambda a, b : a + b', identity=0)
    if storage == dace.dtypes.StorageType.GPU_Global:
        rednode.implementation = 'CUDA (device)'
    elif storage == dace.dtypes.StorageType.FPGA_Global:
        rednode.implementation = 'FPGAPartialReduction'
    body2_state.add_node(rednode)
    write_tmp2 = body2_state.add_write('tmp2')
    body2_state.add_nedge(read_tmp1, rednode,
                          dace.Memlet.from_array('tmp1', tmp1))
    body2_state.add_nedge(rednode, write_tmp2, dace.Memlet('tmp2[0]'))

    read_tmp2 = body3_state.add_read('tmp2')
    write_b = body3_state.add_write('B')
    body3_state.add_nedge(read_tmp2, write_b, dace.Memlet('B[i]'))

    return sdfg
Ejemplo n.º 3
0
def get_property_metdata():
    """ Generate a dictionary of class properties and their metadata.
        This iterates over all classes registered as serializable in DaCe's
        serialization module, checks whether there are properties present
        (true for any class registered via the @make.properties decorator), and
        then assembels their metadata to a dictionary.
    """
    # Lazy import to cut down on module load time.
    from dace.sdfg.nodes import full_class_path

    # In order to get all transformation metadata the @make.properties
    # annotation for each transformation needs to have run, so the
    # transformations are registered in `dace.serialize._DACE_SERIALIZE_TYPES`.
    # The simplest way to achieve this is by simply getting all pattern matches
    # of a dummy SDFG. Since this code should only be run once per SDFG editor,
    # this doesn't add any continuous overhead like it would if we were to
    # send transformation metadata along with `get_transformations`.
    from dace.transformation import optimizer
    _ = optimizer.Optimizer(dace.SDFG('dummy')).get_pattern_matches()

    meta_dict = {}
    meta_dict['__reverse_type_lookup__'] = {}
    meta_dict['__libs__'] = {}
    for typename in dace.serialize._DACE_SERIALIZE_TYPES:
        t = dace.serialize._DACE_SERIALIZE_TYPES[typename]
        if hasattr(t, '__properties__'):
            meta_key = typename
            if (issubclass(t, dace.sdfg.nodes.LibraryNode)
                    and not t == dace.sdfg.nodes.LibraryNode):
                meta_key = full_class_path(t)

            meta_dict[meta_key] = {}
            libnode_implementations = None
            if hasattr(t, 'implementations'):
                libnode_implementations = list(t.implementations.keys())
            for propname, prop in t.__properties__.items():
                meta_dict[meta_key][propname] = prop.meta_to_json(prop)

                if hasattr(prop, 'key_type') and hasattr(prop, 'value_type'):
                    # For dictionary properties, add their key and value types.
                    meta_dict[meta_key][propname][
                        'key_type'] = prop.key_type.__name__
                    meta_dict[meta_key][propname][
                        'value_type'] = prop.value_type.__name__
                elif hasattr(prop, 'element_type'):
                    meta_dict[meta_key][propname][
                        'element_type'] = prop.element_type.__name__

                if prop.choices is not None:
                    # If there are specific choices for this property (i.e. this
                    # property is an enum), list those as metadata as well.
                    if inspect.isclass(prop.choices):
                        if issubclass(prop.choices, aenum.Enum):
                            choices = []
                            for choice in prop.choices:
                                choice_short = str(choice).split('.')[-1]
                                if choice_short != 'Undefined':
                                    choices.append(choice_short)
                            meta_dict[meta_key][propname]['choices'] = choices
                elif (propname == 'implementation'
                      and libnode_implementations is not None):
                    # For implementation properties, add all library
                    # implementations as choices.
                    meta_dict[meta_key][propname][
                        'choices'] = libnode_implementations

                # Create a reverse lookup method for each meta type. This allows
                # us to get meta information about things other than properties
                # contained in some SDFG properties (types, CodeBlocks, etc.).
                if meta_dict[meta_key][propname]['metatype']:
                    meta_type = meta_dict[meta_key][propname]['metatype']
                    if not meta_type in meta_dict['__reverse_type_lookup__']:
                        meta_dict['__reverse_type_lookup__'][
                            meta_type] = meta_dict[meta_key][propname]

            # For library nodes we want to make sure they are all easily
            # accessible under '__libs__', to be able to list them all out.
            if (issubclass(t, dace.sdfg.nodes.LibraryNode)
                    and not t == dace.sdfg.nodes.LibraryNode):
                meta_dict['__libs__'][typename] = meta_key

    # Save a lookup for enum values not present yet.
    enum_list = [
        typename
        for typename, dtype in inspect.getmembers(dace.dtypes, inspect.isclass)
        if issubclass(dtype, aenum.Enum)
    ]
    for enum_name in enum_list:
        if not enum_name in meta_dict['__reverse_type_lookup__']:
            choices = []
            for choice in getattr(dace.dtypes, enum_name):
                choice_short = str(choice).split('.')[-1]
                if choice_short != 'Undefined':
                    choices.append(choice_short)
            meta_dict['__reverse_type_lookup__'][enum_name] = {
                'category': 'General',
                'metatype': enum_name,
                'choices': choices,
            }

    return {
        'metaDict': meta_dict,
    }
from __future__ import print_function

import argparse
import dace
import numpy as np
from typing import List

import time

# Define symbolic sizes for arbitrary inputs
F = dace.symbol('F')
G = dace.symbol('G')
H = dace.symbol('H')

sdfg = dace.SDFG('MatrixMultipy')

# Define data type to use
# Complex number, represented by two 64-bit floats (real and imaginary components)
dtype = dace.complex128
np_dtype = np.complex128

# A = np.array([[1+2j,2+4j,3+6j], [4+2j,5+4j,6+6j]])
# B = np.array([[1+2j,1+4j,1+6j], [0+2j,1+4j,0+6j]])

# computing multiplication time on CPU
tic = time.time()

#####################################################################
# Data-centric functions

# # Map-Reduce version of matrix multiplication
Ejemplo n.º 5
0
def make_backward_function(model: ONNXModel,
                           apply_strict=False
                           ) -> Type[torch.autograd.Function]:
    """ Convert an ONNXModel to a PyTorch differentiable function. This method should not be used on it's own.
        Instead use the ``backward=True`` parameter of :class:`daceml.pytorch.DaceModule`.

        :param model: the model to convert.
        :param apply_strict: whether to apply strict transformations before creating the backward pass.
        :return: the PyTorch compatible :class:`torch.autograd.Function`.
    """

    if len(model.sdfg.nodes()) != 1:
        raise AutoDiffException(
            "Expected to find exactly one SDFGState, found {}".format(
                len(model.sdfg.nodes())))

    forward_sdfg = model.sdfg
    forward_state = model.sdfg.nodes()[0]

    backward_sdfg = dace.SDFG(forward_sdfg.name + "_backward")
    backward_state = backward_sdfg.add_state()

    gen = BackwardPassGenerator(
        sdfg=forward_sdfg,
        state=forward_state,
        given_gradients=[clean_onnx_name(name) for name in model.outputs],
        required_gradients=[clean_onnx_name(name) for name in model.inputs],
        backward_sdfg=backward_sdfg,
        backward_state=backward_state,
        apply_strict=apply_strict)

    backward_result, backward_grad_arrays, backward_input_arrays = gen.backward(
    )

    replaced_scalars = {}
    for name, desc in backward_input_arrays.items():
        if name not in forward_sdfg.arrays:
            raise AutoDiffException(
                "Expected to find array with name '{}' in SDFG".format(name))

        forward_desc = forward_sdfg.arrays[name]
        # we will save this output and pass it to the backward pass

        # Views should not be forwarded. Instead the backward pass generator should forward the source of the view,
        # and rebuild the sequence of required views in the backward pass.
        assert type(forward_desc) is not dt.View
        if isinstance(forward_desc, dt.Scalar):
            # we can't return scalars from SDFGs, so we add a copy to an array of size 1
            fwd_arr_name, _ = forward_sdfg.add_array(
                name + "_array", [1],
                forward_desc.dtype,
                transient=False,
                storage=forward_desc.storage,
                find_new_name=True)
            bwd_arr_name, _ = backward_sdfg.add_array(
                name + "_array", [1],
                forward_desc.dtype,
                transient=False,
                storage=forward_desc.storage,
                find_new_name=True)
            backward_sdfg.arrays[name].transient = True

            fwd_copy_state = forward_sdfg.add_state_after(forward_state,
                                                          label="copy_out_" +
                                                          fwd_arr_name)
            bwd_copy_state = backward_sdfg.add_state_before(backward_state,
                                                            label="copy_in_" +
                                                            bwd_arr_name)
            fwd_copy_state.add_edge(fwd_copy_state.add_read(name), None,
                                    fwd_copy_state.add_write(fwd_arr_name),
                                    None, dace.Memlet(name + "[0]"))

            bwd_copy_state.add_edge(bwd_copy_state.add_read(bwd_arr_name),
                                    None, bwd_copy_state.add_write(name), None,
                                    dace.Memlet(name + "[0]"))
            replaced_scalars[name] = fwd_arr_name
        else:
            forward_sdfg.arrays[name].transient = False

    backward_sdfg.validate()

    class DaceFunction(torch.autograd.Function):
        _backward_sdfg = backward_sdfg
        _forward_model = model
        _backward_result = backward_result

        @staticmethod
        def forward(ctx, *inputs):
            # setup the intermediate buffers

            if any(not inp.is_contiguous() for inp in inputs):
                log.warning("forced to copy input since it was not contiguous")

            copied_inputs = tuple(
                inp if inp.is_contiguous else inp.contiguous()
                for inp in inputs)

            # prepare the arguments
            inputs, params, symbols, outputs = model._call_args(
                args=copied_inputs, kwargs={})

            # create the empty tensors we need for the intermediate values
            for inp, val in backward_input_arrays.items():
                if isinstance(val, dt.Scalar):
                    # the value we need is actually in an array
                    inp = replaced_scalars[inp]

                if inp not in inputs and inp not in outputs and inp not in params:
                    inputs[inp] = create_output_array(symbols,
                                                      forward_sdfg.arrays[inp],
                                                      use_torch=True)

            DaceFunction._forward_model.sdfg(**inputs, **symbols, **params,
                                             **outputs)

            def _get_arr(name, desc):
                if isinstance(desc, dt.Scalar):
                    name = replaced_scalars[name]
                if name in inputs:
                    value = inputs[name]
                elif name in outputs:
                    value = outputs[name]
                elif name in params:
                    value = params[name]
                else:
                    raise AutoDiffException(
                        f"Could not get value of array {name}")

                return value

            # save the arrays we need for the backward pass
            backward_inputs = {
                name: _get_arr(name, desc)
                for name, desc in backward_input_arrays.items()
            }
            for name in replaced_scalars:
                backward_inputs[replaced_scalars[name]] = backward_inputs[name]
                del backward_inputs[name]
            ctx.dace_backward_inputs = backward_inputs
            ctx.dace_symbols = symbols

            if len(outputs) == 1:
                return next(iter(outputs.values()))

            return tuple(outputs.values())

        @staticmethod
        def backward(ctx, *grads):
            backward_inputs = ctx.dace_backward_inputs

            if len(grads) != len(model.outputs):
                raise ValueError("Expected to receive {} grads, got {}".format(
                    len(model.outputs), len(grads)))

            given_grads = dict(
                zip((DaceFunction._backward_result.given_grad_names[
                    clean_onnx_name(outp)] for outp in model.outputs), grads))
            for name, value in given_grads.items():
                if not isinstance(value, torch.Tensor):
                    raise ValueError(
                        "Unsupported input with type {};"
                        " currently only tensor inputs are supported".format(
                            type(value)))
                if not value.is_contiguous():
                    log.warning(
                        "forced to copy input since it was not contiguous")
                    given_grads[name] = value.contiguous()

            # these are the grads we will calculate
            input_grad_names = [
                DaceFunction._backward_result.required_grad_names[
                    clean_onnx_name(inp)]
                for inp in itertools.chain(model.inputs)
            ]

            # init the grads we will calculate with zeros
            grad_values = OrderedDict()
            for name in input_grad_names:
                grad_values[name] = create_output_array(
                    ctx.dace_symbols,
                    backward_grad_arrays[name],
                    use_torch=True,
                    zeros=True)

            DaceFunction._backward_sdfg(**grad_values, **backward_inputs,
                                        **given_grads)

            return tuple(grad_values.values())

    return DaceFunction
Ejemplo n.º 6
0
def mapfission_sdfg():
    sdfg = dace.SDFG('mapfission')
    sdfg.add_array('A', [4], dace.float64)
    sdfg.add_array('B', [2], dace.float64)
    sdfg.add_scalar('scal', dace.float64, transient=True)
    sdfg.add_scalar('s1', dace.float64, transient=True)
    sdfg.add_transient('s2', [2], dace.float64)
    sdfg.add_transient('s3out', [1], dace.float64)
    state = sdfg.add_state()

    # Nodes
    rnode = state.add_read('A')
    ome, omx = state.add_map('outer', dict(i='0:2'))
    t1 = state.add_tasklet('one', {'a'}, {'b'}, 'b = a[0] + a[1]')
    ime2, imx2 = state.add_map('inner', dict(j='0:2'))
    t2 = state.add_tasklet('two', {'a'}, {'b'}, 'b = a * 2')
    s24node = state.add_access('s2')
    s34node = state.add_access('s3out')
    ime3, imx3 = state.add_map('inner', dict(j='0:2'))
    t3 = state.add_tasklet('three', {'a'}, {'b'}, 'b = a[0] * 3')
    scalar = state.add_tasklet('scalar', {}, {'out'}, 'out = 5.0')
    t4 = state.add_tasklet('four', {'ione', 'itwo', 'ithree', 'sc'}, {'out'},
                           'out = ione + itwo[0] * itwo[1] + ithree + sc')
    wnode = state.add_write('B')

    # Edges
    state.add_nedge(ome, scalar, dace.Memlet())
    state.add_memlet_path(rnode,
                          ome,
                          t1,
                          memlet=dace.Memlet.simple('A', '2*i:2*i+2'),
                          dst_conn='a')
    state.add_memlet_path(rnode,
                          ome,
                          ime2,
                          t2,
                          memlet=dace.Memlet.simple('A', '2*i+j'),
                          dst_conn='a')
    state.add_memlet_path(t2,
                          imx2,
                          s24node,
                          memlet=dace.Memlet.simple('s2', 'j'),
                          src_conn='b')
    state.add_memlet_path(rnode,
                          ome,
                          ime3,
                          t3,
                          memlet=dace.Memlet.simple('A', '2*i:2*i+2'),
                          dst_conn='a')
    state.add_memlet_path(t3,
                          imx3,
                          s34node,
                          memlet=dace.Memlet.simple('s3out', '0'),
                          src_conn='b')

    state.add_edge(t1, 'b', t4, 'ione', dace.Memlet.simple('s1', '0'))
    state.add_edge(s24node, None, t4, 'itwo', dace.Memlet.simple('s2', '0:2'))
    state.add_edge(s34node, None, t4, 'ithree',
                   dace.Memlet.simple('s3out', '0'))
    state.add_edge(scalar, 'out', t4, 'sc', dace.Memlet.simple('scal', '0'))
    state.add_memlet_path(t4,
                          omx,
                          wnode,
                          memlet=dace.Memlet.simple('B', 'i'),
                          src_conn='out')

    sdfg.validate()
    return sdfg
Ejemplo n.º 7
0
import numpy as np
import dace
from dace.memlet import Memlet

# Create SDFG
sdfg = dace.SDFG('nested_reduction')
state = sdfg.add_state('a')

# Nodes
A = state.add_array('A', (40, ), dace.float32)
B = state.add_array('B', (20, ), dace.float32)
me, mx = state.add_map('mymap', dict(i='0:20'))
red = state.add_reduce('lambda a,b: a+b', None, 0)

# Edges
state.add_edge(A, None, me, None, Memlet.simple(A, '0:40'))
state.add_edge(me, None, red, None, Memlet.simple(A, '(2*i):(2*i+2)'))
state.add_edge(red, None, mx, None, Memlet.simple(B, 'i'))
state.add_edge(mx, None, B, None, Memlet.simple(B, '0:20'))
sdfg.fill_scope_connectors()

if __name__ == '__main__':
    print('Nested reduction test')

    Adata = np.random.rand(40).astype(np.float32)
    Bdata = np.random.rand(20).astype(np.float32)
    sdfg(A=Adata, B=Bdata)

    B_regression = np.zeros(20, dtype=np.float32)
    B_regression[:] = Adata[::2]
    B_regression[:] += Adata[1::2]
Ejemplo n.º 8
0
Archivo: solve.py Proyecto: mfkiwl/dace
def _make_sdfg_getrs(node, parent_state, parent_sdfg, implementation):

    arr_desc = node.validate(parent_sdfg, parent_state)
    (ain_shape, ain_dtype, ain_strides, bin_shape, bin_dtype, bin_strides,
     out_shape, out_dtype, out_strides, n, rhs) = arr_desc
    dtype = ain_dtype

    sdfg = dace.SDFG("{l}_sdfg".format(l=node.label))

    ain_arr = sdfg.add_array('_ain',
                             ain_shape,
                             dtype=ain_dtype,
                             strides=ain_strides)
    ainout_arr = sdfg.add_array('_ainout', [n, n],
                                dtype=ain_dtype,
                                transient=True)
    bin_arr = sdfg.add_array('_bin',
                             bin_shape,
                             dtype=bin_dtype,
                             strides=bin_strides)
    binout_shape = [n, rhs]
    if implementation == 'cuSolverDn':
        binout_shape = [rhs, n]
    binout_arr = sdfg.add_array('_binout',
                                binout_shape,
                                dtype=out_dtype,
                                transient=True)
    bout_arr = sdfg.add_array('_bout',
                              out_shape,
                              dtype=out_dtype,
                              strides=out_strides)
    ipiv_arr = sdfg.add_array('_pivots', [n], dtype=dace.int32, transient=True)
    info_arr = sdfg.add_array('_info', [1], dtype=dace.int32, transient=True)

    state = sdfg.add_state("{l}_state".format(l=node.label))

    getrf_node = Getrf('getrf')
    getrf_node.implementation = implementation
    getrs_node = Getrs('getrs')
    getrs_node.implementation = implementation

    ain = state.add_read('_ain')
    ainout1 = state.add_read('_ainout')
    ainout2 = state.add_access('_ainout')
    bin = state.add_read('_bin')
    binout1 = state.add_read('_binout')
    binout2 = state.add_read('_binout')
    bout = state.add_access('_bout')
    if implementation == 'cuSolverDn':
        transpose_ain = Transpose('AT', dtype=ain_dtype)
        transpose_ain.implementation = 'cuBLAS'
        state.add_edge(ain, None, transpose_ain, '_inp',
                       Memlet.from_array(*ain_arr))
        state.add_edge(transpose_ain, '_out', ainout1, None,
                       Memlet.from_array(*ainout_arr))
        transpose_bin = Transpose('bT', dtype=bin_dtype)
        transpose_bin.implementation = 'cuBLAS'
        state.add_edge(bin, None, transpose_bin, '_inp',
                       Memlet.from_array(*bin_arr))
        state.add_edge(transpose_bin, '_out', binout1, None,
                       Memlet.from_array(*binout_arr))
        transpose_out = Transpose('XT', dtype=bin_dtype)
        transpose_out.implementation = 'cuBLAS'
        state.add_edge(binout2, None, transpose_out, '_inp',
                       Memlet.from_array(*binout_arr))
        state.add_edge(transpose_out, '_out', bout, None,
                       Memlet.from_array(*bout_arr))
    else:
        state.add_nedge(ain, ainout1, Memlet.from_array(*ain_arr))
        state.add_nedge(bin, binout1, Memlet.from_array(*bin_arr))
        state.add_nedge(binout2, bout, Memlet.from_array(*bout_arr))

    ipiv = state.add_access('_pivots')
    info1 = state.add_write('_info')
    info2 = state.add_write('_info')

    state.add_memlet_path(ainout1,
                          getrf_node,
                          dst_conn="_xin",
                          memlet=Memlet.from_array(*ainout_arr))
    state.add_memlet_path(getrf_node,
                          info1,
                          src_conn="_res",
                          memlet=Memlet.from_array(*info_arr))
    state.add_memlet_path(getrf_node,
                          ipiv,
                          src_conn="_ipiv",
                          memlet=Memlet.from_array(*ipiv_arr))
    state.add_memlet_path(getrf_node,
                          ainout2,
                          src_conn="_xout",
                          memlet=Memlet.from_array(*ainout_arr))
    state.add_memlet_path(ainout2,
                          getrs_node,
                          dst_conn="_a",
                          memlet=Memlet.from_array(*ainout_arr))
    state.add_memlet_path(binout1,
                          getrs_node,
                          dst_conn="_rhs_in",
                          memlet=Memlet.from_array(*binout_arr))
    state.add_memlet_path(ipiv,
                          getrs_node,
                          dst_conn="_ipiv",
                          memlet=Memlet.from_array(*ipiv_arr))
    state.add_memlet_path(getrs_node,
                          info2,
                          src_conn="_res",
                          memlet=Memlet.from_array(*info_arr))
    state.add_memlet_path(getrs_node,
                          binout2,
                          src_conn="_rhs_out",
                          memlet=Memlet.from_array(*binout_arr))

    return sdfg
Ejemplo n.º 9
0
import dace
import numpy as np

sr = dace.SDFG('stiledcopy')
s0 = sr.add_state('s0')

A = s0.add_array('A', [2, 16, 4], dace.float32)
B = s0.add_array('B', [4], dace.float32)
C = s0.add_array('C', [2, 16, 4], dace.float32)

D = s0.add_array('D', [128, 128], dace.float32)
E = s0.add_array('E', [8, 8], dace.float32)
F = s0.add_array('F', [128, 128], dace.float32)

# Reading A at [1, 0:8:8:2, 3]
s0.add_nedge(A, B, dace.Memlet.simple(A, '1, 0:10:8:2, 3'))
s0.add_nedge(B, C, dace.Memlet.simple(C, '1, 0:10:8:2, 3'))

# Emulate a blocked tiled matrix multiplication pattern
s0.add_nedge(D, E, dace.Memlet.simple(D, '8:76:64:4,4:72:64:4'))
s0.add_nedge(E, F, dace.Memlet.simple(F, '8:76:64:4,4:72:64:4'))

if __name__ == '__main__':
    print('Strided range copy tasklet test')
    A = np.random.rand(2, 16, 4).astype(np.float32)
    B = np.random.rand(4).astype(np.float32)
    C = np.random.rand(2, 16, 4).astype(np.float32)
    D = np.random.rand(128, 128).astype(np.float32)
    E = np.random.rand(8, 8).astype(np.float32)
    F = np.random.rand(128, 128).astype(np.float32)
Ejemplo n.º 10
0
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
#
# This sample shows adding a constant integer value to a stream of integers.
#
# It is intended for running hardware_emulation or hardware xilinx targets.

import dace
import numpy as np

# add symbol
N = dace.symbol('N')

# add sdfg
sdfg = dace.SDFG('add_fortytwo')

# add state
state = sdfg.add_state('device_state')

# add arrays
sdfg.add_array('A', [N], dtype=dace.int32, storage=dace.StorageType.CPU_Heap)
sdfg.add_array('B', [N], dtype=dace.int32, storage=dace.StorageType.CPU_Heap)
sdfg.add_array('fpga_A', [N],
               dtype=dace.int32,
               transient=True,
               storage=dace.StorageType.FPGA_Global)
sdfg.add_array('fpga_B', [N],
               dtype=dace.int32,
               transient=True,
               storage=dace.StorageType.FPGA_Global)

# add streams
Ejemplo n.º 11
0
if os.name == 'nt':
    dp.Config.append('compiler', 'cpu', 'libs', value='cublas.lib')
else:
    dp.Config.append('compiler', 'cpu', 'libs', value='libcublas.so')
######################################################################

# Create symbols
M = dp.symbol('M')
K = dp.symbol('K')
N = dp.symbol('N')
M.set(25)
K.set(26)
N.set(27)

# Create a GPU SDFG with a custom C++ tasklet
sdfg = dp.SDFG('cublastest')
state = sdfg.add_state()

# Add arrays
sdfg.add_array('A', [M, K], dtype=dp.float64)
sdfg.add_array('B', [K, N], dtype=dp.float64)
sdfg.add_array('C', [M, N], dtype=dp.float64)

# Add transient GPU arrays
sdfg.add_transient('gA', [M, K], dp.float64, dp.StorageType.GPU_Global)
sdfg.add_transient('gB', [K, N], dp.float64, dp.StorageType.GPU_Global)
sdfg.add_transient('gC', [M, N], dp.float64, dp.StorageType.GPU_Global)

# Add custom C++ tasklet to graph
tasklet = state.add_tasklet(
    # Tasklet name (can be arbitrary)
Ejemplo n.º 12
0
def test_duplicate_codegen():

    # Unfortunately I have to generate this graph manually, as doing it with the python
    # frontend wouldn't result in the node ordering that we want

    sdfg = dace.SDFG("dup")
    state = sdfg.add_state()

    c_task = state.add_tasklet("c_task",
                               inputs={"c"},
                               outputs={"d"},
                               code='d = c')
    e_task = state.add_tasklet("e_task",
                               inputs={"a", "d"},
                               outputs={"e"},
                               code="e = a + d")
    f_task = state.add_tasklet("f_task",
                               inputs={"b", "d"},
                               outputs={"f"},
                               code="f = b + d")

    _, A_arr = sdfg.add_array("A", [
        1,
    ], dace.float32)
    _, B_arr = sdfg.add_array("B", [
        1,
    ], dace.float32)
    _, C_arr = sdfg.add_array("C", [
        1,
    ], dace.float32)
    _, D_arr = sdfg.add_array("D", [
        1,
    ], dace.float32)
    _, E_arr = sdfg.add_array("E", [
        1,
    ], dace.float32)
    _, F_arr = sdfg.add_array("F", [
        1,
    ], dace.float32)
    A = state.add_read("A")
    B = state.add_read("B")
    C = state.add_read("C")
    D = state.add_access("D")
    E = state.add_write("E")
    F = state.add_write("F")

    state.add_edge(C, None, c_task, "c", Memlet.from_array("C", C_arr))
    state.add_edge(c_task, "d", D, None, Memlet.from_array("D", D_arr))

    state.add_edge(A, None, e_task, "a", Memlet.from_array("A", A_arr))
    state.add_edge(B, None, f_task, "b", Memlet.from_array("B", B_arr))
    state.add_edge(D, None, f_task, "d", Memlet.from_array("D", D_arr))
    state.add_edge(D, None, e_task, "d", Memlet.from_array("D", D_arr))

    state.add_edge(e_task, "e", E, None,
                   Memlet.from_array("E", E_arr, wcr="lambda x, y: x + y"))
    state.add_edge(f_task, "f", F, None,
                   Memlet.from_array("F", F_arr, wcr="lambda x, y: x + y"))

    A = np.array([1], dtype=np.float32)
    B = np.array([1], dtype=np.float32)
    C = np.array([1], dtype=np.float32)
    D = np.array([1], dtype=np.float32)
    E = np.zeros_like(A)
    F = np.zeros_like(A)

    sdfg(A=A, B=B, C=C, D=D, E=E, F=F)

    assert E[0] == 2
    assert F[0] == 2
Ejemplo n.º 13
0
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
"""
    Two sequential RTL tasklets connected through a memlet.
"""

import dace
import argparse

import numpy as np

# add sdfg
sdfg = dace.SDFG('rtl_multi_tasklet')

# add state
state = sdfg.add_state()

# add arrays
sdfg.add_array('A', [1], dtype=dace.int32)
sdfg.add_array('B', [1], dtype=dace.int32)
sdfg.add_array('C', [1], dtype=dace.int32)

# add custom cpp tasklet
tasklet0 = state.add_tasklet(name='rtl_tasklet0',
                             inputs={'a'},
                             outputs={'b'},
                             code="""\
typedef enum [1:0] {READY, BUSY, DONE} state_e;
state_e state;

always@(posedge ap_aclk) begin
    if (ap_areset) begin // case: reset
Ejemplo n.º 14
0
def make_sdfg():
    """ Creates three SDFG nested within each other, where two input arrays and
        two output arrays are fed throughout the hierarchy. One input and one
        output are not used for anything in the innermost SDFG, and can thus be
        removed in all nestings.
    """

    n = dace.symbol("N")

    sdfg_outer = dace.SDFG("prune_connectors_test")
    sdfg_outer.set_global_code("#include <fstream>\n#include <mutex>")
    state_outer = sdfg_outer.add_state("state_outer")
    sdfg_outer.add_symbol("N", dace.int32)

    sdfg_middle = dace.SDFG("middle")
    sdfg_middle.add_symbol("N", dace.int32)
    nsdfg_middle = state_outer.add_nested_sdfg(
        sdfg_middle,
        sdfg_outer, {"read_used_middle", "read_unused_middle"},
        {"write_used_middle", "write_unused_middle"},
        name="middle")
    state_middle = sdfg_middle.add_state("middle")

    entry_middle, exit_middle = state_middle.add_map("map_middle",
                                                     {"i": "0:N"})

    sdfg_inner = dace.SDFG("inner")
    sdfg_inner.add_symbol("N", dace.int32)
    nsdfg_inner = state_middle.add_nested_sdfg(
        sdfg_inner,
        sdfg_middle, {"read_used_inner", "read_unused_inner"},
        {"write_used_inner", "write_unused_inner"},
        name="inner")
    state_inner = sdfg_inner.add_state("inner")

    entry_inner, exit_inner = state_inner.add_map("map_inner", {"j": "0:N"})
    tasklet = state_inner.add_tasklet("tasklet", {"read_tasklet"},
                                      {"write_tasklet"},
                                      "write_tasklet = read_tasklet + 1")

    for s in ["unused", "used"]:

        # Read

        sdfg_outer.add_array(f"read_{s}", [n, n], dace.uint16)
        sdfg_outer.add_array(f"read_{s}_outer", [n, n], dace.uint16)
        sdfg_middle.add_array(f"read_{s}_middle", [n, n], dace.uint16)
        sdfg_inner.add_array(f"read_{s}_inner", [n], dace.uint16)

        read_outer = state_outer.add_read(f"read_{s}")
        read_middle = state_middle.add_read(f"read_{s}_middle")

        state_outer.add_memlet_path(read_outer,
                                    nsdfg_middle,
                                    dst_conn=f"read_{s}_middle",
                                    memlet=dace.Memlet(f"read_{s}[0:N, 0:N]"))
        state_middle.add_memlet_path(
            read_middle,
            entry_middle,
            nsdfg_inner,
            dst_conn=f"read_{s}_inner",
            memlet=dace.Memlet(f"read_{s}_middle[i, 0:N]"))

        # Write

        sdfg_outer.add_array(f"write_{s}", [n, n], dace.uint16)
        sdfg_outer.add_array(f"write_{s}_outer", [n, n], dace.uint16)
        sdfg_middle.add_array(f"write_{s}_middle", [n, n], dace.uint16)
        sdfg_inner.add_array(f"write_{s}_inner", [n], dace.uint16)

        write_outer = state_outer.add_write(f"write_{s}")
        write_middle = state_middle.add_write(f"write_{s}_middle")

        state_outer.add_memlet_path(nsdfg_middle,
                                    write_outer,
                                    src_conn=f"write_{s}_middle",
                                    memlet=dace.Memlet(f"write_{s}[0:N, 0:N]"))
        state_middle.add_memlet_path(
            nsdfg_inner,
            exit_middle,
            write_middle,
            src_conn=f"write_{s}_inner",
            memlet=dace.Memlet(f"write_{s}_middle[i, 0:N]"))

    read_inner = state_inner.add_read(f"read_used_inner")
    write_inner = state_inner.add_write(f"write_used_inner")

    state_inner.add_memlet_path(read_inner,
                                entry_inner,
                                tasklet,
                                dst_conn=f"read_tasklet",
                                memlet=dace.Memlet(f"read_{s}_inner[j]"))

    state_inner.add_memlet_path(tasklet,
                                exit_inner,
                                write_inner,
                                src_conn=f"write_tasklet",
                                memlet=dace.Memlet(f"write_{s}_inner[j]"))

    # Create mapped nested SDFG where the map entry and exit would be orphaned
    # by pruning the read and write, and must have nedges added to them

    isolated_read = state_outer.add_read("read_unused_outer")
    isolated_write = state_outer.add_write("write_unused_outer")
    isolated_sdfg = dace.SDFG("isolated_sdfg")
    isolated_nsdfg = state_outer.add_nested_sdfg(isolated_sdfg,
                                                 sdfg_outer,
                                                 {"read_unused_isolated"},
                                                 {"write_unused_isolated"},
                                                 name="isolated")
    isolated_sdfg.add_symbol("i", dace.int32)
    isolated_nsdfg.symbol_mapping["i"] = "i"
    isolated_entry, isolated_exit = state_outer.add_map(
        "isolated", {"i": "0:N"})
    state_outer.add_memlet_path(
        isolated_read,
        isolated_entry,
        isolated_nsdfg,
        dst_conn="read_unused_isolated",
        memlet=dace.Memlet("read_unused_outer[0:N, 0:N]"))
    state_outer.add_memlet_path(
        isolated_nsdfg,
        isolated_exit,
        isolated_write,
        src_conn="write_unused_isolated",
        memlet=dace.Memlet("write_unused_outer[0:N, 0:N]"))
    isolated_state = isolated_sdfg.add_state("isolated")
    isolated_state.add_tasklet("isolated", {}, {},
                               """\
static std::mutex mutex;
std::unique_lock<std::mutex> lock(mutex);
std::ofstream of("prune_connectors_test.txt", std::ofstream::app);
of << i << "\\n";""",
                               language=dace.Language.CPP)

    return sdfg_outer
Ejemplo n.º 15
0
    # Set up map that only has one exit
    _, me, mx = state.add_mapped_tasklet(
        'boundary',
        dict(i='%s:%s' % (y0, y0 + height), j='%s:%s' % (x0, x0 + width)), {},
        '''b = %f''' % initval,
        dict(b=dace.Memlet.simple(B.data, 'i,j')),
        external_edges=False)
    state.add_nedge(
        mx, B,
        dace.Memlet.simple(B.data,
                           '%s:%s, %s:%s' % (y0, y0 + height, x0, x0 + width)))


#################

sdfg = dace.SDFG('stencilboundaries')

# Add arrays and kernel
sdfg.add_array('A', [H, W], dace.float32)
sdfg.add_array('B', [H, W], dace.float32)
sdfg.add_constants({'KERNEL': STENCIL_KERNEL})

mainstate = sdfg.add_state()

# The 7x7 stencil
_, me, mx = mainstate.add_mapped_tasklet(
    'stencil',
    dict(i='3:H-3', j='3:W-3'),
    dict(a=dace.Memlet.simple('A', 'i-3:i+4, j-3:j+4')),
    '''
b = 0
Ejemplo n.º 16
0
def make_sdfg(implementation,
              dtype,
              storage=dace.StorageType.Default,
              data_layout='CCC'):
    m = dace.symbol("m")
    n = dace.symbol("n")
    k = dace.symbol("k")

    suffix = "_device" if storage != dace.StorageType.Default else ""
    transient = storage != dace.StorageType.Default

    sdfg = dace.SDFG("mm_{}_{}".format(dtype.type.__name__, data_layout))
    state = sdfg.add_state("dataflow")

    # Data layout is a 3-character string with either C (for row major)
    # or F (for column major) matrices for x, y, and z respectively.
    xstrides = (k, 1) if data_layout[0] == 'C' else (1, m)
    ystrides = (n, 1) if data_layout[1] == 'C' else (1, k)
    zstrides = (n, 1) if data_layout[2] == 'C' else (1, m)

    sdfg.add_array("x" + suffix, [m, k],
                   dtype,
                   storage=storage,
                   transient=transient,
                   strides=xstrides)
    sdfg.add_array("y" + suffix, [k, n],
                   dtype,
                   storage=storage,
                   transient=transient,
                   strides=ystrides)
    sdfg.add_array("result" + suffix, [m, n],
                   dtype,
                   storage=storage,
                   transient=transient,
                   strides=zstrides)

    x = state.add_read("x" + suffix)
    y = state.add_read("y" + suffix)
    result = state.add_write("result" + suffix)

    node = blas.nodes.matmul.MatMul("matmul", dtype)

    state.add_memlet_path(x,
                          node,
                          dst_conn="_a",
                          memlet=Memlet.simple(x, "0:m, 0:k"))
    state.add_memlet_path(y,
                          node,
                          dst_conn="_b",
                          memlet=Memlet.simple(y, "0:k, 0:n"))
    state.add_memlet_path(node,
                          result,
                          src_conn="_c",
                          memlet=Memlet.simple(result, "0:m, 0:n"))

    if storage != dace.StorageType.Default:
        sdfg.add_array("x", [m, k], dtype)
        sdfg.add_array("y", [k, n], dtype)
        sdfg.add_array("result", [m, n], dtype)

        init_state = sdfg.add_state("copy_to_device")
        sdfg.add_edge(init_state, state, dace.InterstateEdge())

        x_host = init_state.add_read("x")
        y_host = init_state.add_read("y")
        x_device = init_state.add_write("x" + suffix)
        y_device = init_state.add_write("y" + suffix)
        init_state.add_memlet_path(x_host,
                                   x_device,
                                   memlet=Memlet.simple(x_host, "0:m, 0:k"))
        init_state.add_memlet_path(y_host,
                                   y_device,
                                   memlet=Memlet.simple(y_host, "0:k, 0:n"))

        finalize_state = sdfg.add_state("copy_to_host")
        sdfg.add_edge(state, finalize_state, dace.InterstateEdge())

        result_device = finalize_state.add_write("result" + suffix)
        result_host = finalize_state.add_read("result")
        finalize_state.add_memlet_path(result_device,
                                       result_host,
                                       memlet=Memlet.simple(
                                           result_device, "0:m, 0:n"))

    return sdfg
Ejemplo n.º 17
0
import dace
import numpy as np

sdfg = dace.SDFG('addedgepair')
state = sdfg.add_state()

# Add nodes
t = state.add_tasklet('do', {'a'}, {'b'}, 'b = 2*a')
a = state.add_array('A', [31], dace.float64)
b = state.add_array('B', [1], dace.float64)
me, mx = state.add_map('m', dict(i='0:31'))

# Add edges
state.add_edge_pair(me,
                    t,
                    a,
                    dace.Memlet.simple(a, 'i'),
                    internal_connector='a')
state.add_edge_pair(mx,
                    t,
                    b,
                    dace.Memlet.simple(b, '0', wcr_str='lambda a,b: a+b'),
                    internal_connector='b',
                    scope_connector='o')

if __name__ == '__main__':
    A = np.random.rand(31).astype(np.float64)
    B = np.array([0.], dtype=np.float64)
    sdfg(A=A, B=B)

    diff = np.linalg.norm(B[0] - np.sum(2 * A))
Ejemplo n.º 18
0
# Copyright 2019-2020 ETH Zurich and the DaCe authors. All rights reserved.
import copy
import dace
import dace.sdfg.nodes
import numpy as np

# Python version of the SDFG below
# @dace.program
# def reduce_with_offsets(A: dace.float64[50, 50], B: dace.float64[25]):
#     B[4:11] = dace.reduce(lambda a,b: a+b, A[25:50, 13:20], axis=0,
#                           identity=0)

reduce_with_offsets = dace.SDFG('reduce_with_offsets')
reduce_with_offsets.add_array('A', [50, 50], dace.float64)
reduce_with_offsets.add_array('B', [25], dace.float64)

state = reduce_with_offsets.add_state()
node_a = state.add_read('A')
node_b = state.add_write('B')
red = state.add_reduce('lambda a,b: a+b', [0], 0)
state.add_nedge(node_a, red, dace.Memlet.simple('A', '25:50, 13:20'))
state.add_nedge(red, node_b, dace.Memlet.simple('B', '4:11'))


def test_offset_reduce():
    A = np.random.rand(50, 50)
    B = np.random.rand(25)

    sdfg = copy.deepcopy(reduce_with_offsets)
    sdfg(A=A, B=B)
Ejemplo n.º 19
0
    def expansion(node, parent_state, parent_sdfg):

        sdfg = dace.SDFG(node.label + "_outer")
        state = sdfg.add_state(node.label + "_outer")

        shape = np.array(node.shape)

        parameters = np.array(["i", "j", "k"])[:len(shape)]

        # Find outer data descriptor
        field_dtype = {}
        for e in parent_state.in_edges(node):
            field = e.dst_conn
            if field in node.accesses:
                field_dtype[field] = parent_sdfg.data(
                    dace.sdfg.find_input_arraynode(parent_state, e).data).dtype
        for e in parent_state.out_edges(node):
            field = e.src_conn
            if field in node.output_fields:
                field_dtype[field] = parent_sdfg.data(
                    dace.sdfg.find_output_arraynode(parent_state,
                                                    e).data).dtype

        #######################################################################
        # Tasklet code generation
        #######################################################################

        code = node.code.as_string

        # Replace relative indices with memlet names
        converter = SubscriptConverter()
        new_ast = converter.visit(ast.parse(code))
        code = astunparse.unparse(new_ast)
        code_memlet_names = converter.names

        #######################################################################
        # Implement boundary conditions
        #######################################################################

        boundary_code = ""
        # Loop over each input
        for field_name, (iterators, accesses) in node.accesses.items():
            if sum(iterators, 0) == 0:
                continue  # Scalar input
            # Loop over each access to this data
            for indices in accesses:
                try:
                    memlet_name = code_memlet_names[field_name][indices]
                except KeyError:
                    import pdb
                    pdb.set_trace()
                    raise KeyError("Missing access in code: {}[{}]".format(
                        field_name, ", ".join(map(str, indices))))
                cond = []
                # Loop over each index of this access
                for i, offset in enumerate(indices):
                    if offset < 0:
                        cond.append(parameters[i] + " < " + str(-offset))
                    elif offset > 0:
                        cond.append(parameters[i] + " >= " +
                                    str(shape[i] - offset))
                ctype = field_dtype[field_name]
                if len(cond) == 0:
                    boundary_code += "{} = {}_in\n".format(
                        memlet_name, memlet_name)
                else:
                    bc = node.boundary_conditions[field_name]
                    btype = bc["btype"]
                    if btype == "copy":
                        center_memlet = code_memlet_names[field_name][center]
                        boundary_val = "_{}".format(center_memlet)
                    elif btype == "constant":
                        boundary_val = bc["value"]
                    elif btype == "shrink":
                        # We don't need to do anything here, it's up to the
                        # user to not use the junk output
                        boundary_val = JUNK_VAL
                        pass
                    else:
                        raise ValueError(
                            "Unsupported boundary condition type: {}".format(
                                node.boundary_conditions[field_name]["btype"]))
                    boundary_code += ("{} = {} if {} else {}_in\n".format(
                        memlet_name, boundary_val, " or ".join(cond),
                        memlet_name))

        #######################################################################
        # Write all output memlets
        #######################################################################

        write_code = "\n".join("{}_out = {}".format(
            code_memlet_names[output][tuple(
                0
                for _ in range(len(shape)))], code_memlet_names[output][tuple(
                    0 for _ in range(len(shape)))], output)
                               for output in node.output_fields)

        code = boundary_code + "\n" + code + "\n" + write_code

        input_memlets = sum(
            [
                ["{}_in".format(c) for c in v.values()]
                for k, v in code_memlet_names.items()
                # Don't include scalar variables
                if k in node.accesses and sum(node.accesses[k][0], 0) > 0
            ],
            [])
        output_memlets = sum(
            [["{}_out".format(c) for c in v.values()]
             for k, v in code_memlet_names.items() if k in node.output_fields],
            [])

        #######################################################################
        # Create tasklet
        #######################################################################

        tasklet = state.add_tasklet(node.label + "_compute",
                                    input_memlets,
                                    output_memlets,
                                    code,
                                    language=dace.dtypes.Language.Python)

        #######################################################################
        # Build dataflow state
        #######################################################################

        entry, exit = state.add_map(
            node.name + "_map",
            collections.OrderedDict((parameters[i], "0:" + str(shape[i]))
                                    for i in range(len(shape))))

        for field in code_memlet_names:

            dtype = field_dtype[field]

            if field in node.accesses:
                read_node = state.add_read(field)
                input_dims = node.accesses[field][0]
                input_shape = tuple(s for s, v in zip(shape, input_dims) if v)
                data = sdfg.add_array(field, input_shape, dtype)
                field_parameters = tuple(
                    p for p, v in zip(parameters, input_dims) if v)
                for indices, connector in code_memlet_names[field].items():
                    access_str = ", ".join(
                        "{} + ({})".format(p, i)
                        for p, i in zip(field_parameters, indices))
                    memlet = dace.Memlet.simple(field,
                                                access_str,
                                                num_accesses=-1)
                    memlet.allow_oob = True
                    state.add_memlet_path(read_node,
                                          entry,
                                          tasklet,
                                          dst_conn=connector + "_in",
                                          memlet=memlet)
            else:
                data = sdfg.add_array(field, shape, dtype)
                write_node = state.add_write(field)
                for indices, connector in code_memlet_names[field].items():
                    state.add_memlet_path(tasklet,
                                          exit,
                                          write_node,
                                          src_conn=connector + "_out",
                                          memlet=dace.Memlet.simple(
                                              field, ", ".join(parameters)))

        # Add scalars as symbols
        for field_name, (indices, accesses) in node.accesses.items():
            if not any(indices):
                sdfg.add_symbol(field_name, parent_sdfg.symbols[field_name])

        #######################################################################

        sdfg.parent = parent_state
        sdfg._parent_sdfg = parent_sdfg  # TODO: this should not be necessary

        return sdfg
# Copyright 2019-2020 ETH Zurich and the DaCe authors. All rights reserved.
import dace
import numpy as np

sdfg = dace.SDFG('inline_nonsink_access_test')
sdfg.add_array('A', [1], dace.float32)
sdfg.add_array('B', [1], dace.float32)

state = sdfg.add_state()
A = state.add_access('A')
B = state.add_access('B')
B_out = state.add_write('B')
t = state.add_tasklet('add', {'a', 'b'}, {'c'}, 'c = a + b')

state.add_edge(A, None, t, 'a', dace.Memlet.simple('A', '0'))
state.add_edge(B, None, t, 'b', dace.Memlet.simple('B', '0'))
state.add_edge(t, 'c', B_out, None, dace.Memlet.simple('B', '0'))

# Add nested SDFG
nsdfg = dace.SDFG('nested_ina_test')
nsdfg.add_array('C', [1], dace.float32)
nsdfg.add_array('D', [1], dace.float32)

nstate = nsdfg.add_state()
t_init = nstate.add_tasklet('init', {}, {'o'}, 'o = 2')
t_square = nstate.add_tasklet('square', {'i'}, {'o'}, 'o = i * i')
t_cube = nstate.add_tasklet('cube', {'i'}, {'o'}, 'o = i * i * i')
C = nstate.add_access('C')
C2 = nstate.add_access('C')
D = nstate.add_write('D')
Ejemplo n.º 21
0
    def __init__(self, builder):

        self.buttons = [
            {
                "image": "cursor.png",
                "type": "mouse",
                "tool": "Mouse"
            },
            {
                "image": "delete.png",
                "type": "delete",
                "tool": "Delete"
            },
            {
                "image": "array.png",
                "type": "node",
                "tool": "Array"
            },
            {
                "image": "edge_thin.png",
                "type": "edge",
                "tool": "Memlet"
            },
            {
                "image": "map.png",
                "type": "node",
                "tool": "Map"
            },
            {
                "image": "tasklet.png",
                "type": "node",
                "tool": "Tasklet"
            },
            {
                "image": "stream.png",
                "type": "node",
                "tool": "Stream"
            },
            {
                "image": "stream_map.png",
                "type": "node",
                "tool": "Consume"
            },
            {
                "image": "state.png",
                "type": "node",
                "tool": "State"
            },
            {
                "image": "state_trans.png",
                "type": "edge",
                "tool": "State Transition"
            },
            {
                "image": "edge_head_redir.png",
                "type": "edge_redir",
                "tool": "Head Redirection"
            },
            {
                "image": "edge_tail_redir.png",
                "type": "edge_redir",
                "tool": "Tail Redirection"
            },
        ]

        self.active_tool = None  # an element of self.buttons
        self.builder = builder
        self.current_editing_script = ""
        self.sdfg_changed = False

        # Initialize the SDFG to a valid one. Otherwise, we need
        # to check in all the functions that use it if it is None.
        self.sdfg = dace.SDFG("newsdfg", OrderedDict(), {})

        self.first_selected_node_for_edge = None
        self.first_selected_state_for_edge = None
        self.selected_edge_for_redir = None

        self.rendered_sdfg = RenderedGraph()
        self.sdfg_da = self.builder.get_object("sdfg_editor_da")
        self.rendered_sdfg.set_drawing_area(self.sdfg_da)

        plabel = self.builder.get_object("se_propertylabel")
        pgrid = self.builder.get_object("se_propertygrid")
        self.propren = PropertyRenderer(plabel, pgrid, self.OnSDFGUpdate)

        self.image_store = ImageStore()
        self.load_buttons()
        self.connect_signals()
Ejemplo n.º 22
0
# Copyright 2019-2020 ETH Zurich and the DaCe authors. All rights reserved.
import dace as dp
import numpy as np

sdfg = dp.SDFG('fib_consume')
state = sdfg.add_state('state')

# Arrays
initial_value = state.add_array('iv', [1], dp.int32)
stream = state.add_stream('S', dp.int32, transient=True)
stream_init = state.add_stream('S', dp.int32, transient=True)
stream_out = state.add_stream('S', dp.int32, transient=True)
output = state.add_array('res', [1], dp.float32)

# Consume and tasklet
consume_entry, consume_exit = state.add_consume('cons', ('p', '4'))
tasklet = state.add_tasklet(
    'fibonacci', {'s'}, {'sout', 'val'}, """
if s == 1:
    val = 1
elif s > 1:
    sout = s - 1   # Recurse by pushing smaller values
    sout = s - 2
""")

# Edges
state.add_nedge(initial_value, stream_init,
                dp.Memlet.from_array(stream_init.data, stream_init.desc(sdfg)))
state.add_edge(stream, None, consume_entry, 'IN_stream',
               dp.Memlet.from_array(stream.data, stream.desc(sdfg)))
state.add_edge(consume_entry, 'OUT_stream', tasklet, 's',
Ejemplo n.º 23
0
def test_nested_sdfg():
    print('SDFG consecutive tasklet (nested SDFG) test')
    # Externals (parameters, symbols)
    N = dp.symbol('N')
    N.set(20)
    input = dp.ndarray([N], dp.int32)
    output = dp.ndarray([N], dp.int32)
    input[:] = dp.int32(5)
    output[:] = dp.int32(0)

    # Construct outer SDFG
    mysdfg = SDFG('ctasklet_nested_sdfg')
    state = mysdfg.add_state()
    A_ = state.add_array('A', [N], dp.int32)
    B_ = state.add_array('B', [N], dp.int32)

    # Construct inner SDFG
    nsdfg = dp.SDFG('ctasklet_nested_sdfg_inner')
    nstate = nsdfg.add_state()
    a = nstate.add_array('a', [N], dp.int32)
    b = nstate.add_array('b', [N], dp.int32)
    map_entry, map_exit = nstate.add_map('mymap', dict(i='0:N/2'))
    tasklet = nstate.add_tasklet('mytasklet', {'aa'}, {'bb'}, 'bb = 5*aa')
    nstate.add_memlet_path(a,
                           map_entry,
                           tasklet,
                           dst_conn='aa',
                           memlet=Memlet('a[k*N/2+i]'))
    tasklet2 = nstate.add_tasklet('mytasklet2', {'cc'}, {'dd'}, 'dd = 2*cc')
    nstate.add_edge(tasklet, 'bb', tasklet2, 'cc', Memlet())
    nstate.add_memlet_path(tasklet2,
                           map_exit,
                           b,
                           src_conn='dd',
                           memlet=Memlet('b[k*N/2+i]'))

    # Add outer edges
    omap_entry, omap_exit = state.add_map('omap', dict(k='0:2'))
    nsdfg_node = state.add_nested_sdfg(nsdfg, None, {'a'}, {'b'})
    state.add_memlet_path(A_,
                          omap_entry,
                          nsdfg_node,
                          dst_conn='a',
                          memlet=Memlet('A[0:N]'))
    state.add_memlet_path(nsdfg_node,
                          omap_exit,
                          B_,
                          src_conn='b',
                          memlet=Memlet('B[0:N]'))

    mysdfg.validate()
    mysdfg(A=input, B=output, N=N)

    diff = np.linalg.norm(10 * input - output) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5

    mysdfg.simplify()

    mysdfg(A=input, B=output, N=N)

    diff = np.linalg.norm(10 * input - output) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
Ejemplo n.º 24
0
    def apply(self, sdfg):
        # Retrieve map entry and exit nodes.
        graph = sdfg.nodes()[self.state_id]
        map_entry = graph.nodes()[self.subgraph[MapToForLoop._map_entry]]
        map_exits = graph.exit_nodes(map_entry)
        loop_idx = map_entry.map.params[0]
        loop_from, loop_to, loop_step = map_entry.map.range[0]

        nested_sdfg = dace.SDFG(graph.label + '_' + map_entry.map.label)

        # Construct nested SDFG
        begin = nested_sdfg.add_state('begin')
        guard = nested_sdfg.add_state('guard')
        body = nested_sdfg.add_state('body')
        end = nested_sdfg.add_state('end')

        nested_sdfg.add_edge(
            begin, guard,
            edges.InterstateEdge(assignments={str(loop_idx): str(loop_from)}))
        nested_sdfg.add_edge(
            guard,
            body,
            edges.InterstateEdge(condition = str(loop_idx) + ' <= ' + \
                                             str(loop_to))
        )
        nested_sdfg.add_edge(
            guard,
            end,
            edges.InterstateEdge(condition = str(loop_idx) + ' > ' + \
                                             str(loop_to))
        )
        nested_sdfg.add_edge(
            body,
            guard,
            edges.InterstateEdge(assignments = {str(loop_idx): str(loop_idx) + \
                                                ' + ' +str(loop_step)})
        )

        # Add map contents
        map_subgraph = graph.scope_subgraph(map_entry)
        for node in map_subgraph.nodes():
            if node is not map_entry and node not in map_exits:
                body.add_node(node)
        for src, src_conn, dst, dst_conn, memlet in map_subgraph.edges():
            if src is not map_entry and dst not in map_exits:
                body.add_edge(src, src_conn, dst, dst_conn, memlet)

        # Reconnect inputs
        nested_in_data_nodes = {}
        nested_in_connectors = {}
        nested_in_memlets = {}
        for i, edge in enumerate(graph.in_edges(map_entry)):
            src, src_conn, dst, dst_conn, memlet = edge
            data_label = '_in_' + memlet.data
            memdata = sdfg.arrays[memlet.data]
            if isinstance(memdata, data.Array):
                data_array = sdfg.add_array(data_label, memdata.dtype, [
                    symbolic.overapproximate(r)
                    for r in memlet.bounding_box_size()
                ])
            elif isinstance(memdata, data.Scalar):
                data_array = sdfg.add_scalar(data_label, memdata.dtype)
            else:
                raise NotImplementedError()
            data_node = nodes.AccessNode(data_label)
            body.add_node(data_node)
            nested_in_data_nodes.update({i: data_node})
            nested_in_connectors.update({i: data_label})
            nested_in_memlets.update({i: memlet})
            for _, _, _, _, old_memlet in body.edges():
                if old_memlet.data == memlet.data:
                    old_memlet.data = data_label
            #body.add_edge(data_node, None, dst, dst_conn, memlet)

        # Reconnect outputs
        nested_out_data_nodes = {}
        nested_out_connectors = {}
        nested_out_memlets = {}
        for map_exit in map_exits:
            for i, edge in enumerate(graph.out_edges(map_exit)):
                src, src_conn, dst, dst_conn, memlet = edge
                data_label = '_out_' + memlet.data
                memdata = sdfg.arrays[memlet.data]
                if isinstance(memdata, data.Array):
                    data_array = sdfg.add_array(data_label, memdata.dtype, [
                        symbolic.overapproximate(r)
                        for r in memlet.bounding_box_size()
                    ])
                elif isinstance(memdata, data.Scalar):
                    data_array = sdfg.add_scalar(data_label, memdata.dtype)
                else:
                    raise NotImplementedError()
                data_node = nodes.AccessNode(data_label)
                body.add_node(data_node)
                nested_out_data_nodes.update({i: data_node})
                nested_out_connectors.update({i: data_label})
                nested_out_memlets.update({i: memlet})
                for _, _, _, _, old_memlet in body.edges():
                    if old_memlet.data == memlet.data:
                        old_memlet.data = data_label
                #body.add_edge(src, src_conn, data_node, None, memlet)

        # Add nested SDFG and reconnect it
        nested_node = graph.add_nested_sdfg(
            nested_sdfg, sdfg, set(nested_in_connectors.values()),
            set(nested_out_connectors.values()))

        for i, edge in enumerate(graph.in_edges(map_entry)):
            src, src_conn, dst, dst_conn, memlet = edge
            graph.add_edge(src, src_conn, nested_node, nested_in_connectors[i],
                           nested_in_memlets[i])

        for map_exit in map_exits:
            for i, edge in enumerate(graph.out_edges(map_exit)):
                src, src_conn, dst, dst_conn, memlet = edge
                graph.add_edge(nested_node, nested_out_connectors[i], dst,
                               dst_conn, nested_out_memlets[i])

        for src, src_conn, dst, dst_conn, memlet in graph.out_edges(map_entry):
            i = int(src_conn[4:]) - 1
            new_memlet = dcpy(memlet)
            new_memlet.data = nested_in_data_nodes[i].data
            body.add_edge(nested_in_data_nodes[i], None, dst, dst_conn,
                          new_memlet)

        for map_exit in map_exits:
            for src, src_conn, dst, dst_conn, memlet in graph.in_edges(
                    map_exit):
                i = int(dst_conn[3:]) - 1
                new_memlet = dcpy(memlet)
                new_memlet.data = nested_out_data_nodes[i].data
                body.add_edge(src, src_conn, nested_out_data_nodes[i], None,
                              new_memlet)

        for node in map_subgraph:
            graph.remove_node(node)
Ejemplo n.º 25
0
    def expansion(node, parent_state, parent_sdfg):

        sdfg = dace.SDFG(node.label + "_outer")
        state = sdfg.add_state(node.label + "_outer")

        (inputs, outputs, shape, field_to_data, field_to_desc, field_to_edge,
         vector_lengths) = parse_connectors(node, parent_state, parent_sdfg)

        #######################################################################
        # Parse the tasklet code
        #######################################################################

        # Replace relative indices with memlet names
        converter = SubscriptConverter()

        # Add copy boundary conditions
        for field in node.boundary_conditions:
            if node.boundary_conditions[field]["btype"] == "copy":
                center_index = tuple(0 for _ in range(
                    len(parent_sdfg.arrays[field_to_data[field]].shape)))
                # This will register the renaming
                converter.convert(field, center_index)

        # Replace accesses in the code
        code, field_accesses = parse_accesses(node.code.as_string, outputs)

        iterator_mapping = make_iterator_mapping(node, field_accesses, shape)
        vector_length = validate_vector_lengths(vector_lengths,
                                                iterator_mapping)
        shape_vectorized = tuple(s / vector_length if i == len(shape) -
                                 1 else s for i, s in enumerate(shape))

        # Extract which fields to read from streams and what to buffer
        buffer_sizes = collections.OrderedDict()
        buffer_accesses = collections.OrderedDict()
        scalars = {}  # {name: type}
        for field_name in inputs:
            relative = field_accesses[field_name]
            dim_mask = iterator_mapping[field_name]
            if not any(dim_mask):
                # This is a scalar, no buffer needed. Instead, the SDFG must
                # take this as a symbol
                scalars[field_name] = parent_sdfg.symbols[field_name]
                sdfg.add_symbol(field_name, parent_sdfg.symbols[field_name])
                continue
            abs_indices = ([
                dim_to_abs_val(i, tuple(s for s, m in zip(shape, dim_mask)
                                        if m), parent_sdfg) for i in relative
            ] + ([0] if field_name in node.boundary_conditions
                 and node.boundary_conditions[field_name]["btype"] == "copy"
                 else []))
            max_access = max(abs_indices)
            min_access = min(abs_indices)
            buffer_size = max_access - min_access + vector_lengths[field_name]
            buffer_sizes[field_name] = buffer_size
            # (indices relative to center, buffer indices, center index)
            buffer_accesses[field_name] = ([tuple(r) for r in relative], [
                i - min_access for i in abs_indices
            ], -min_access)

        # Create a initialization phase corresponding to the highest distance
        # to the center
        init_sizes = [
            (buffer_sizes[key] - vector_lengths[key] - val[2]) // vector_length
            for key, val in buffer_accesses.items()
        ]
        init_size_max = int(np.max(init_sizes))

        parameters = [f"_i{i}" for i in range(len(shape))]

        # Dimensions we need to iterate over
        iterator_mask = np.array([s != 0 and s != 1 for s in shape],
                                 dtype=bool)
        iterators = make_iterators(
            tuple(s for s, m in zip(shape_vectorized, iterator_mask) if m),
            parameters=tuple(s for s, m in zip(parameters, iterator_mask)
                             if m))

        # Manually add pipeline entry and exit nodes
        pipeline_range = dace.properties.SubsetProperty.from_string(', '.join(
            iterators.values()))
        pipeline = dace.sdfg.nodes.Pipeline(
            "compute_" + node.label,
            list(iterators.keys()),
            pipeline_range,
            dace.dtypes.ScheduleType.FPGA_Device,
            False,
            init_size=init_size_max,
            init_overlap=False,
            drain_size=init_size_max,
            drain_overlap=True)
        entry = dace.sdfg.nodes.PipelineEntry(pipeline)
        exit = dace.sdfg.nodes.PipelineExit(pipeline)
        state.add_nodes_from([entry, exit])

        # Add nested SDFG to do 1) shift buffers 2) read from input 3) compute
        nested_sdfg = dace.SDFG(node.label + "_inner", parent=state)
        nested_sdfg_tasklet = state.add_nested_sdfg(
            nested_sdfg,
            sdfg,
            # Input connectors
            [k + "_in" for k in inputs if any(iterator_mapping[k])] +
            [name + "_buffer_in" for name, _ in buffer_sizes.items()],
            # Output connectors
            [k + "_out" for k in outputs] +
            [name + "_buffer_out" for name, _ in buffer_sizes.items()],
            schedule=dace.ScheduleType.FPGA_Device)
        # Propagate symbols
        for sym_name, sym_type in parent_sdfg.symbols.items():
            nested_sdfg.add_symbol(sym_name, sym_type)
            nested_sdfg_tasklet.symbol_mapping[sym_name] = sym_name
        # Map iterators
        for p in parameters:
            nested_sdfg.add_symbol(p, dace.int64)
            nested_sdfg_tasklet.symbol_mapping[p] = p

        # Shift state, which shifts all buffers by one
        shift_state = nested_sdfg.add_state(node.label + "_shift")

        # Update state, which reads new values from memory
        update_state = nested_sdfg.add_state(node.label + "_update")

        #######################################################################
        # Implement boundary conditions
        #######################################################################

        boundary_code, oob_cond = generate_boundary_conditions(
            node, shape, field_accesses, field_to_desc, iterator_mapping)

        #######################################################################
        # Only write if we're in bounds
        #######################################################################

        write_code = ("\n".join([
            "{}_inner_out = {}\n".format(
                output,
                field_accesses[output][tuple(0 for _ in range(len(shape)))])
            for output in outputs
        ]))
        if init_size_max > 0 or len(oob_cond) > 0:
            write_cond = []
            if init_size_max > 0:
                init_cond = pipeline.init_condition()
                write_cond.append("not " + init_cond)
                nested_sdfg_tasklet.symbol_mapping[init_cond] = init_cond
                nested_sdfg.add_symbol(init_cond, dace.bool)
            if len(oob_cond) > 0:
                oob_cond = " or ".join(sorted(oob_cond))
                oob_cond = f"not ({oob_cond})"
                write_cond.append(oob_cond)
            write_cond = " and ".join(write_cond)
            write_cond = f"if {write_cond}:\n\t"
        else:
            write_cond = ""

        code = boundary_code + "\n" + code + "\n" + write_code

        #######################################################################
        # Create DaCe compute state
        #######################################################################

        # Compute state, which reads from input channels, performs the compute,
        # and writes to the output channel(s)
        compute_state = nested_sdfg.add_state(node.label + "_compute")
        compute_inputs = list(
            itertools.chain.from_iterable(
                [["_" + v for v in field_accesses[f].values()] for f in inputs
                 if any(iterator_mapping[f])]))
        compute_tasklet = compute_state.add_tasklet(
            node.label + "_compute",
            compute_inputs, {name + "_inner_out"
                             for name in outputs},
            code,
            language=dace.dtypes.Language.Python)
        if vector_length > 1:
            compute_unroll_entry, compute_unroll_exit = compute_state.add_map(
                compute_state.label + "_unroll",
                {"i_unroll": f"0:{vector_length}"},
                schedule=dace.ScheduleType.FPGA_Device,
                unroll=True)

        # Connect the three nested states
        nested_sdfg.add_edge(shift_state, update_state,
                             dace.sdfg.InterstateEdge())
        nested_sdfg.add_edge(update_state, compute_state,
                             dace.sdfg.InterstateEdge())

        # First, grab scalar variables
        for scalar, scalar_type in scalars.items():
            nested_sdfg.add_symbol(scalar, scalar_type)

        # Code to increment custom iterators
        iterator_code = ""

        for (field_name, size), init_size in zip(buffer_sizes.items(),
                                                 init_sizes):

            data_name = field_to_data[field_name]
            connector = field_to_edge[field_name].dst_conn
            data_name_outer = connector
            data_name_inner = field_name + "_in"
            desc_outer = parent_sdfg.arrays[data_name].clone()
            desc_outer.transient = False
            sdfg.add_datadesc(data_name_outer, desc_outer)

            mapping = iterator_mapping[field_name]
            is_array = not isinstance(desc_outer, dt.Stream)

            # If this array is part of the initialization phase, it needs its
            # own iterator, which we need to instantiate and increment in the
            # outer SDFG
            if is_array:
                if init_size == 0:
                    field_index = [s for s, p in zip(parameters, mapping) if p]
                else:
                    # Create custom iterators for this array
                    num_dims = sum(mapping, 0)
                    field_iterators = [(f"_{field_name}_i{i}", shape[i])
                                       for i in range(num_dims) if mapping[i]]
                    start_index = init_size_max - init_size
                    tab = ""
                    if start_index > 0:
                        iterator_code += (
                            f"if {pipeline.iterator_str()} >= {start_index}:\n"
                        )
                        tab += "  "
                    for i, (it, s) in enumerate(reversed(field_iterators)):
                        iterator_code += f"""\
{tab}if {it} < {s} - 1:
{tab}  {it} = {it} + 1
{tab}else:
{tab}  {it} = 0\n"""
                        tab += "  "
                    field_index = [fi[0] for fi in field_iterators]
                    for fi in field_index:
                        pipeline.additional_iterators[fi] = "0"
                        nested_sdfg.add_symbol(fi, dace.int64)
                        nested_sdfg_tasklet.symbol_mapping[fi] = fi
                field_index = ", ".join(field_index)
            else:
                field_index = "0"

            # Begin reading according to this field's own buffer size, which is
            # translated to an index by subtracting it from the maximum buffer
            # size
            begin_reading = init_size_max - init_size
            total_size = functools.reduce(operator.mul, shape_vectorized, 1)
            end_reading = total_size + init_size_max - init_size

            # Outer memory read
            read_node_outer = state.add_read(data_name_outer)
            if begin_reading != 0 or end_reading != total_size + init_size_max:
                sdfg.add_scalar(f"{field_name}_wavefront",
                                desc_outer.dtype,
                                storage=dace.StorageType.FPGA_Local,
                                transient=True)
                wavefront_access = state.add_access(f"{field_name}_wavefront")
                condition = []
                it = pipeline.iterator_str()
                if begin_reading != 0:
                    condition.append(f"{it} >= {begin_reading}")
                if end_reading != total_size + init_size_max:
                    condition.append(f"{it} < {end_reading}")
                condition = " and ".join(condition)
                update_tasklet = state.add_tasklet(
                    f"read_{field_name}", {"wavefront_in"}, {"wavefront_out"},
                    f"if {condition}:\n"
                    "\twavefront_out = wavefront_in\n",
                    language=dace.dtypes.Language.Python)
                state.add_memlet_path(read_node_outer,
                                      entry,
                                      update_tasklet,
                                      dst_conn="wavefront_in",
                                      memlet=dace.Memlet(
                                          f"{data_name_outer}[{field_index}]",
                                          dynamic=True))
                state.add_memlet_path(update_tasklet,
                                      wavefront_access,
                                      src_conn="wavefront_out",
                                      memlet=dace.Memlet(
                                          f"{field_name}_wavefront",
                                          dynamic=True))
                state.add_memlet_path(
                    wavefront_access,
                    nested_sdfg_tasklet,
                    dst_conn=f"{field_name}_in",
                    memlet=dace.Memlet(f"{field_name}_wavefront"))
            else:
                state.add_memlet_path(
                    read_node_outer,
                    entry,
                    nested_sdfg_tasklet,
                    dst_conn=f"{field_name}_in",
                    memlet=dace.Memlet(f"{data_name_outer}[{field_index}]"))

            # Create inner memory access
            nested_sdfg.add_scalar(data_name_inner,
                                   desc_outer.dtype,
                                   storage=dace.StorageType.FPGA_Local,
                                   transient=False)

            buffer_name_outer = f"{node.label}_{field_name}_buffer"
            buffer_name_inner_read = f"{field_name}_buffer_in"
            buffer_name_inner_write = f"{field_name}_buffer_out"

            # Create buffer transient in outer SDFG
            field_dtype = parent_sdfg.data(data_name).dtype
            _, desc_outer = sdfg.add_array(
                buffer_name_outer, (size, ),
                field_dtype.base_type,
                storage=dace.dtypes.StorageType.FPGA_Local,
                transient=True)

            # Create read and write nodes
            read_node_outer = state.add_read(buffer_name_outer)
            write_node_outer = state.add_write(buffer_name_outer)

            # Outer buffer read
            state.add_memlet_path(
                read_node_outer,
                entry,
                nested_sdfg_tasklet,
                dst_conn=buffer_name_inner_read,
                memlet=dace.Memlet(f"{buffer_name_outer}[0:{size}]"))

            # Outer buffer write
            state.add_memlet_path(nested_sdfg_tasklet,
                                  exit,
                                  write_node_outer,
                                  src_conn=buffer_name_inner_write,
                                  memlet=dace.Memlet(
                                      f"{write_node_outer.data}[0:{size}]",
                                      dynamic=True))

            # Inner copy
            desc_inner_read = desc_outer.clone()
            desc_inner_read.transient = False
            desc_inner_read.name = buffer_name_inner_read
            desc_inner_write = desc_inner_read.clone()
            desc_inner_write.name = buffer_name_inner_write
            nested_sdfg.add_datadesc(buffer_name_inner_read, desc_inner_read)
            nested_sdfg.add_datadesc(buffer_name_inner_write, desc_inner_write)

            # Make shift state if necessary
            if size > 1:
                shift_read = shift_state.add_read(buffer_name_inner_read)
                shift_write = shift_state.add_write(buffer_name_inner_write)
                shift_entry, shift_exit = shift_state.add_map(
                    f"shift_{field_name}",
                    {"i_shift": f"0:{size} - {vector_lengths[field_name]}"},
                    schedule=dace.dtypes.ScheduleType.FPGA_Device,
                    unroll=True)
                shift_tasklet = shift_state.add_tasklet(
                    f"shift_{field_name}", {f"{field_name}_shift_in"},
                    {f"{field_name}_shift_out"},
                    f"{field_name}_shift_out = {field_name}_shift_in")
                shift_state.add_memlet_path(
                    shift_read,
                    shift_entry,
                    shift_tasklet,
                    dst_conn=field_name + "_shift_in",
                    memlet=dace.Memlet(
                        f"{shift_read.data}"
                        f"[i_shift + {vector_lengths[field_name]}]"))
                shift_state.add_memlet_path(
                    shift_tasklet,
                    shift_exit,
                    shift_write,
                    src_conn=field_name + "_shift_out",
                    memlet=dace.Memlet(f"{shift_write.data}[i_shift]"))

            # Make update state
            update_read = update_state.add_read(data_name_inner)
            update_write = update_state.add_write(buffer_name_inner_write)
            subset = f"{size} - {vector_length}:{size}" if size > 1 else "0"
            update_state.add_memlet_path(update_read,
                                         update_write,
                                         memlet=dace.Memlet(
                                             f"{update_read.data}",
                                             other_subset=f"{subset}"))

            # Make compute state
            compute_read = compute_state.add_read(buffer_name_inner_read)
            for relative, offset in zip(buffer_accesses[field_name][0],
                                        buffer_accesses[field_name][1]):
                memlet_name = field_accesses[field_name][tuple(relative)]
                if vector_length > 1:
                    if vector_lengths[field_name] > 1:
                        offset = f"{offset} + i_unroll"
                    else:
                        offset = str(offset)
                    path = [
                        compute_read, compute_unroll_entry, compute_tasklet
                    ]
                else:
                    offset = str(offset)
                    path = [compute_read, compute_tasklet]
                compute_state.add_memlet_path(
                    *path,
                    dst_conn="_" + memlet_name,
                    memlet=dace.Memlet(f"{compute_read.data}[{offset}]"))

        # Tasklet to update iterators
        if iterator_code:
            update_iterator_tasklet = state.add_tasklet(
                f"{node.label}_update_iterators", {}, {}, iterator_code)
            state.add_memlet_path(nested_sdfg_tasklet,
                                  update_iterator_tasklet,
                                  memlet=dace.Memlet())
            state.add_memlet_path(update_iterator_tasklet,
                                  exit,
                                  memlet=dace.Memlet())

        for field_name in outputs:

            for offset in field_accesses[field_name]:
                if offset is not None and list(offset) != [0] * len(offset):
                    raise NotImplementedError("Output offsets not implemented")

            data_name = field_to_data[field_name]

            # Outer write
            data_name_outer = field_name
            data_name_inner = field_name + "_out"
            desc_outer = parent_sdfg.arrays[data_name].clone()
            desc_outer.transient = False
            array_index = ", ".join(map(str, parameters))
            try:
                sdfg.add_datadesc(data_name_outer, desc_outer)
            except NameError:  # Already an input
                pass

            # Create inner access
            nested_sdfg.add_scalar(data_name_inner,
                                   desc_outer.dtype,
                                   storage=dace.StorageType.FPGA_Local,
                                   transient=False)

            # Inner write
            write_node_inner = compute_state.add_write(data_name_inner)

            # Intermediate buffer, mostly relevant for vectorization
            output_buffer_name = field_name + "_output_buffer"
            nested_sdfg.add_array(output_buffer_name, (vector_length, ),
                                  desc_outer.dtype.base_type,
                                  storage=dace.StorageType.FPGA_Registers,
                                  transient=True)
            output_buffer = compute_state.add_access(output_buffer_name)

            # If vectorized, we need to pass through the unrolled scope
            if vector_length > 1:
                compute_state.add_memlet_path(
                    compute_tasklet,
                    compute_unroll_exit,
                    output_buffer,
                    src_conn=field_name + "_inner_out",
                    memlet=dace.Memlet(f"{output_buffer_name}[i_unroll]"))
            else:
                compute_state.add_memlet_path(
                    compute_tasklet,
                    output_buffer,
                    src_conn=field_name + "_inner_out",
                    memlet=dace.Memlet(f"{output_buffer_name}[0]")),

            # Final memlet to the output
            compute_state.add_memlet_path(
                output_buffer,
                write_node_inner,
                memlet=dace.Memlet(f"{write_node_inner.data}")),

            # Conditional write tasklet
            sdfg.add_scalar(f"{field_name}_result",
                            desc_outer.dtype,
                            storage=dace.StorageType.FPGA_Local,
                            transient=True)
            output_access = state.add_access(f"{field_name}_result")
            state.add_memlet_path(nested_sdfg_tasklet,
                                  output_access,
                                  src_conn=data_name_inner,
                                  memlet=dace.Memlet(f"{field_name}_result"))
            output_tasklet = state.add_tasklet(
                f"{field_name}_conditional_write", {f"_{field_name}_result"},
                {f"_{data_name_inner}"},
                (write_cond + f"_{data_name_inner} = _{field_name}_result"))
            state.add_memlet_path(output_access,
                                  output_tasklet,
                                  dst_conn=f"_{field_name}_result",
                                  memlet=dace.Memlet(f"{field_name}_result"))
            write_node_outer = state.add_write(data_name_outer)
            if isinstance(desc_outer, dt.Stream):
                subset = "0"
            else:
                subset = array_index
            state.add_memlet_path(output_tasklet,
                                  exit,
                                  write_node_outer,
                                  src_conn=f"_{data_name_inner}",
                                  memlet=dace.Memlet(
                                      f"{write_node_outer.data}[{subset}]",
                                      dynamic=True)),

        return sdfg
Ejemplo n.º 26
0
def make_sdfg(name="transpose"):

    n = dace.symbol("N")
    m = dace.symbol("M")

    sdfg = dace.SDFG(name)

    pre_state = sdfg.add_state(name + "_pre")
    state = sdfg.add_state(name)
    post_state = sdfg.add_state(name + "_post")
    sdfg.add_edge(pre_state, state, dace.InterstateEdge())
    sdfg.add_edge(state, post_state, dace.InterstateEdge())

    _, desc_input_host = sdfg.add_array("a_input", (n, m), dace.float64)
    _, desc_output_host = sdfg.add_array("a_output", (m, n), dace.float64)
    desc_input_device = copy.copy(desc_input_host)
    desc_input_device.storage = dace.StorageType.FPGA_Global
    desc_input_device.location["bank"] = 0
    desc_input_device.transient = True
    desc_output_device = copy.copy(desc_output_host)
    desc_output_device.storage = dace.StorageType.FPGA_Global
    desc_output_device.location["bank"] = 1
    desc_output_device.transient = True
    sdfg.add_datadesc("a_input_device", desc_input_device)
    sdfg.add_datadesc("a_output_device", desc_output_device)

    # Host to device
    pre_read = pre_state.add_read("a_input")
    pre_write = pre_state.add_write("a_input_device")
    pre_state.add_memlet_path(pre_read,
                              pre_write,
                              memlet=dace.Memlet.simple(pre_write, "0:N, 0:M"))

    # Device to host
    post_read = post_state.add_read("a_output_device")
    post_write = post_state.add_write("a_output")
    post_state.add_memlet_path(post_read,
                               post_write,
                               memlet=dace.Memlet.simple(
                                   post_write, "0:N, 0:M"))

    # Compute state
    read = state.add_read("a_input_device")
    write = state.add_write("a_output_device")

    # Trivial tasklet
    tasklet = state.add_tasklet(name, {"_in"}, {"_out"}, "_out = _in")

    entry, exit = state.add_map(name, {
        "i": "0:N",
        "j": "0:M",
    },
                                schedule=dace.ScheduleType.FPGA_Device)

    state.add_memlet_path(read,
                          entry,
                          tasklet,
                          dst_conn="_in",
                          memlet=dace.Memlet.simple("a_input_device",
                                                    "i, j",
                                                    num_accesses=1))
    state.add_memlet_path(tasklet,
                          exit,
                          write,
                          src_conn="_out",
                          memlet=dace.Memlet.simple("a_output_device",
                                                    "j, i",
                                                    num_accesses=1))

    return sdfg
Ejemplo n.º 27
0
# Copyright 2019-2022 ETH Zurich and the DaCe authors. All rights reserved.
""" SDFG API sample that showcases nested SDFG creation. """
import dace
import numpy as np

# Create outer SDFG
sdfg = dace.SDFG('nested_main')

# Add global array
sdfg.add_array('A', [2], dace.float32)


# Sample state contents
def mystate(state, src, dst):
    src_node = state.add_read(src)
    dst_node = state.add_write(dst)
    tasklet = state.add_tasklet('aaa2', {'a'}, {'b'}, 'b = a + 1')

    # input path (src->tasklet[a])
    state.add_memlet_path(src_node,
                          tasklet,
                          dst_conn='a',
                          memlet=dace.Memlet(data=src, subset='0'))
    # output path (tasklet[b]->dst)
    state.add_memlet_path(tasklet,
                          dst_node,
                          src_conn='b',
                          memlet=dace.Memlet(data=dst, subset='0'))


# Create nested SDFG
def make_sdfg(squeeze, name):
    N, M = dace.symbol('N'), dace.symbol('M')
    sdfg = dace.SDFG('memlet_propagation_%s' % name)
    sdfg.add_symbol('N', dace.int64)
    sdfg.add_symbol('M', dace.int64)
    sdfg.add_array('A', [N + 1, M], dace.int64)
    state = sdfg.add_state()
    me, mx = state.add_map('map', dict(j='1:M'))
    w = state.add_write('A')

    # Create nested SDFG
    nsdfg = dace.SDFG('nested')
    if squeeze:
        nsdfg.add_array('a1', [N + 1], dace.int64, strides=[M])
        nsdfg.add_array('a2', [N - 1], dace.int64, strides=[M])
    else:
        nsdfg.add_array('a', [N + 1, M], dace.int64)

    nstate = nsdfg.add_state()
    a1 = nstate.add_write('a1' if squeeze else 'a')
    a2 = nstate.add_write('a2' if squeeze else 'a')
    t1 = nstate.add_tasklet('add99', {}, {'out'}, 'out = i + 99')
    t2 = nstate.add_tasklet('add101', {}, {'out'}, 'out = i + 101')
    nstate.add_edge(t1, 'out', a1, None,
                    dace.Memlet('a1[i]' if squeeze else 'a[i, 1]'))
    nstate.add_edge(t2, 'out', a2, None,
                    dace.Memlet('a2[i]' if squeeze else 'a[i+2, 0]'))
    nsdfg.add_loop(None, nstate, None, 'i', '0', 'i < N - 2', 'i + 1')

    # Connect nested SDFG to toplevel one
    nsdfg_node = state.add_nested_sdfg(nsdfg,
                                       None, {},
                                       {'a1', 'a2'} if squeeze else {'a'},
                                       symbol_mapping=dict(j='j', N='N',
                                                           M='M'))
    state.add_nedge(me, nsdfg_node, dace.Memlet())
    # Add outer memlet that is overapproximated
    if squeeze:
        # This is expected to propagate to A[0:N - 2, j].
        state.add_memlet_path(nsdfg_node,
                              mx,
                              w,
                              src_conn='a1',
                              memlet=dace.Memlet('A[0:N+1, j]'))
        # This is expected to propagate to A[2:N, j - 1].
        state.add_memlet_path(nsdfg_node,
                              mx,
                              w,
                              src_conn='a2',
                              memlet=dace.Memlet('A[2:N+1, j-1]'))
    else:
        # This memlet is expected to propagate to A[0:N, j - 1:j + 1].
        state.add_memlet_path(nsdfg_node,
                              mx,
                              w,
                              src_conn='a',
                              memlet=dace.Memlet('A[0:N+1, j-1:j+1]'))

    propagation.propagate_memlets_sdfg(sdfg)

    return sdfg
Ejemplo n.º 29
0
def make_sdfg(name="fpga_stcl_test", dtype=dace.float32, veclen=8):

    vtype = dace.vector(dtype, veclen)

    n = dace.symbol("N")
    m = dace.symbol("M")

    sdfg = dace.SDFG(name)

    pre_state = sdfg.add_state(name + "_pre")
    state = sdfg.add_state(name)
    post_state = sdfg.add_state(name + "_post")
    sdfg.add_edge(pre_state, state, dace.InterstateEdge())
    sdfg.add_edge(state, post_state, dace.InterstateEdge())

    _, desc_input_host = sdfg.add_array("a", (n, m / veclen), vtype)
    _, desc_output_host = sdfg.add_array("b", (n, m / veclen), vtype)
    desc_input_device = copy.copy(desc_input_host)
    desc_input_device.storage = dace.StorageType.FPGA_Global
    desc_input_device.location["bank"] = 0
    desc_input_device.transient = True
    desc_output_device = copy.copy(desc_output_host)
    desc_output_device.storage = dace.StorageType.FPGA_Global
    desc_output_device.location["bank"] = 1
    desc_output_device.transient = True
    sdfg.add_datadesc("a_device", desc_input_device)
    sdfg.add_datadesc("b_device", desc_output_device)

    # Host to device
    pre_read = pre_state.add_read("a")
    pre_write = pre_state.add_write("a_device")
    pre_state.add_memlet_path(
        pre_read, pre_write, memlet=dace.Memlet(f"a_device[0:N, 0:M/{veclen}]"))

    # Device to host
    post_read = post_state.add_read("b_device")
    post_write = post_state.add_write("b")
    post_state.add_memlet_path(
        post_read,
        post_write,
        memlet=dace.Memlet(f"b_device[0:N, 0:M/{veclen}]"))

    # Compute state
    read_memory = state.add_read("a_device")
    write_memory = state.add_write("b_device")

    # Memory streams
    sdfg.add_stream("a_stream",
                    vtype,
                    storage=dace.StorageType.FPGA_Local,
                    transient=True)
    sdfg.add_stream("b_stream",
                    vtype,
                    storage=dace.StorageType.FPGA_Local,
                    transient=True)
    produce_input_stream = state.add_write("a_stream")
    consume_input_stream = state.add_read("a_stream")
    produce_output_stream = state.add_write("b_stream")
    consume_output_stream = state.add_write("b_stream")

    tasklet = state.add_tasklet(
        name, {"_north", "_west", "_east", "_south"}, {"result"}, """\
north = _north if i >= 1 else 1
west = _west if {W}*j + u >= 1 else 1
east = _east if {W}*j + u < M - 1 else 1
south = _south if i < N - 1 else 1

result = 0.25 * (north + west + east + south)""".format(W=veclen))

    entry, exit = state.add_pipeline(name, {
        "i": "0:N",
        "j": "0:M/{}".format(veclen),
    },
                                     schedule=dace.ScheduleType.FPGA_Device,
                                     init_size=m / veclen,
                                     init_overlap=False,
                                     drain_size=m / veclen,
                                     drain_overlap=True)

    # Unrolled map
    unroll_entry, unroll_exit = state.add_map(
        name + "_unroll", {"u": "0:{}".format(veclen)},
        schedule=dace.ScheduleType.FPGA_Device,
        unroll=True)

    # Container-to-container copies between arrays and streams
    state.add_memlet_path(read_memory,
                          produce_input_stream,
                          memlet=dace.Memlet(
                              f"{read_memory.data}[0:N, 0:M/{veclen}]",
                              other_subset="0"))
    state.add_memlet_path(consume_output_stream,
                          write_memory,
                          memlet=dace.Memlet(
                              write_memory.data,
                              f"{write_memory.data}[0:N, 0:M/{veclen}]",
                              other_subset="0"))

    # Container-to-container copy from vectorized stream to non-vectorized
    # buffer
    sdfg.add_array("input_buffer", (1, ),
                   vtype,
                   storage=dace.StorageType.FPGA_Local,
                   transient=True)
    sdfg.add_array("shift_register", (2 * m + veclen, ),
                   dtype,
                   storage=dace.StorageType.FPGA_ShiftRegister,
                   transient=True)
    sdfg.add_array("output_buffer", (veclen, ),
                   dtype,
                   storage=dace.StorageType.FPGA_Local,
                   transient=True)
    sdfg.add_array("output_buffer_packed", (1, ),
                   vtype,
                   storage=dace.StorageType.FPGA_Local,
                   transient=True)
    input_buffer = state.add_access("input_buffer")
    shift_register = state.add_access("shift_register")
    output_buffer = state.add_access("output_buffer")
    output_buffer_packed = state.add_access("output_buffer_packed")

    # Only write if not initializing
    read_tasklet = state.add_tasklet(
        name + "_conditional_read", {"_in"}, {"_out"},
        "if not {}:\n\t_out = _in".format(entry.pipeline.drain_condition()))

    # Input stream to buffer
    state.add_memlet_path(consume_input_stream,
                          entry,
                          read_tasklet,
                          dst_conn="_in",
                          memlet=dace.Memlet(f"{consume_input_stream.data}[0]",
                                             dynamic=True))
    state.add_memlet_path(read_tasklet,
                          input_buffer,
                          src_conn="_out",
                          memlet=dace.Memlet(f"{input_buffer.data}[0]"))
    state.add_memlet_path(input_buffer,
                          shift_register,
                          memlet=dace.Memlet(f"{input_buffer.data}[0]",
                                             other_subset=f"2*M:(2*M + {veclen})"))

    # Stencils accesses
    state.add_memlet_path(
        shift_register,
        unroll_entry,
        tasklet,
        dst_conn="_north",
        memlet=dace.Memlet(f"{shift_register.data}[u]"))  # North
    state.add_memlet_path(
        shift_register,
        unroll_entry,
        tasklet,
        dst_conn="_west",
        memlet=dace.Memlet(f"{shift_register.data}[u + M - 1]"))  # West
    state.add_memlet_path(
        shift_register,
        unroll_entry,
        tasklet,
        dst_conn="_east",
        memlet=dace.Memlet(f"{shift_register.data}[u + M + 1]"))  # East
    state.add_memlet_path(
        shift_register,
        unroll_entry,
        tasklet,
        dst_conn="_south",
        memlet=dace.Memlet(f"{shift_register.data}[u + 2 * M]"))  # South

    # Tasklet to buffer
    state.add_memlet_path(tasklet,
                          unroll_exit,
                          output_buffer,
                          src_conn="result",
                          memlet=dace.Memlet(f"{output_buffer.data}[u]"))

    # Pack buffer
    state.add_memlet_path(output_buffer,
                          output_buffer_packed,
                          memlet=dace.Memlet(f"{output_buffer_packed.data}[0]",
                                             other_subset=f"0:{veclen}"))

    # Only write if not initializing
    write_tasklet = state.add_tasklet(
        name + "_conditional_write", {"_in"}, {"_out"},
        "if not {}:\n\t_out = _in".format(entry.pipeline.init_condition()))

    # Buffer to output stream
    state.add_memlet_path(output_buffer_packed,
                          write_tasklet,
                          dst_conn="_in",
                          memlet=dace.Memlet(f"{output_buffer_packed.data}[0]"))

    # Buffer to output stream
    state.add_memlet_path(write_tasklet,
                          exit,
                          produce_output_stream,
                          src_conn="_out",
                          memlet=dace.Memlet(f"{produce_output_stream.data}[0]",
                                             dynamic=True))

    return sdfg
Ejemplo n.º 30
0
def test_tasklet_array():
    """
        Test the simple array execution sample.
    """

    n = 128
    N = dace.symbol('N')
    N.set(n)

    # add sdfg
    sdfg = dace.SDFG('rtl_tasklet_array')

    # add state
    state = sdfg.add_state()

    # add arrays
    sdfg.add_array('A', [N], dtype=dace.int32)
    sdfg.add_array('B', [N], dtype=dace.int32)

    # add custom cpp tasklet
    tasklet = state.add_tasklet(name='rtl_tasklet',
                                inputs={'a'},
                                outputs={'b'},
                                code='''
        always@(posedge ap_aclk) begin
            if (ap_areset) begin
                s_axis_a_tready <= 1;
                m_axis_b_tvalid <= 0;
                m_axis_b_tdata <= 0;
            end else if (s_axis_a_tvalid && s_axis_a_tready) begin
                s_axis_a_tready <= 0;
                m_axis_b_tvalid <= 1;
                m_axis_b_tdata <= s_axis_a_tdata + 42;
            end else if (m_axis_b_tvalid && m_axis_b_tready) begin
                s_axis_a_tready <= 1;
                m_axis_b_tvalid <= 0;
                m_axis_b_tdata <= 0;
            end
        end
        ''',
                                language=dace.Language.SystemVerilog)

    # add input/output array
    A = state.add_read('A')
    B = state.add_write('B')

    # connect input/output array with the tasklet
    state.add_edge(A, None, tasklet, 'a', dace.Memlet('A[0:N]'))
    state.add_edge(tasklet, 'b', B, None, dace.Memlet('B[0:N]'))

    # validate sdfg
    sdfg.specialize({'N': N.get()})
    sdfg.validate()

    # init data structures
    a = np.random.randint(0, 100, N.get()).astype(np.int32)
    b = np.zeros((N.get(), )).astype(np.int32)

    # call program
    sdfg(A=a, B=b)

    # check result
    assert (b == a + 42).all()