Example #1
0
def make_read_col():

    sdfg = SDFG("spmv_read_col")

    pre_state, body, post_state = make_iteration_space(sdfg)

    a_col = body.add_array("A_col_mem", (nnz, ),
                           itype,
                           storage=StorageType.FPGA_Global)
    col_pipe = body.add_stream("col_pipe",
                               itype,
                               storage=StorageType.FPGA_Local)

    tasklet = body.add_tasklet("read_col", {"col_in"}, {"col_out"},
                               "col_out = col_in[row_begin + c]")

    body.add_memlet_path(a_col,
                         tasklet,
                         dst_conn="col_in",
                         memlet=Memlet.simple(a_col, "0:nnz"))
    body.add_memlet_path(tasklet,
                         col_pipe,
                         src_conn="col_out",
                         memlet=Memlet.simple(col_pipe, "0"))

    return sdfg
Example #2
0
def make_sdfg(dtype):

    n = dace.symbol("n")

    sdfg = dace.SDFG("mpi_reduce")
    state = sdfg.add_state("dataflow")

    sdfg.add_array("inbuf", [n], dtype, transient=False)
    sdfg.add_array("outbuf", [n], dtype, transient=False)
    sdfg.add_array("root", [1], dace.dtypes.int32, transient=False)
    inbuf = state.add_access("inbuf")
    outbuf = state.add_access("outbuf")
    root = state.add_access("root")
    reduce_node = mpi.nodes.reduce.Reduce("reduce")

    state.add_memlet_path(inbuf,
                          reduce_node,
                          dst_conn="_inbuffer",
                          memlet=Memlet.simple(inbuf, "0:n", num_accesses=n))
    state.add_memlet_path(root,
                          reduce_node,
                          dst_conn="_root",
                          memlet=Memlet.simple(root, "0:1", num_accesses=1))
    state.add_memlet_path(reduce_node,
                          outbuf,
                          src_conn="_outbuffer",
                          memlet=Memlet.simple(outbuf, "0:n", num_accesses=n))

    return sdfg
Example #3
0
def make_nested_vecAdd_sdfg(sdfg_name: str, dtype=dace.float32):
    '''
    Builds an SDFG for vector addition. Internally has a nested SDFG in charge of actually
    performing the computation.
    :param sdfg_name: name to give to the sdfg
    :param dtype: used data type
    :return: an SDFG
    '''
    n = dace.symbol("size")
    vecAdd_parent_sdfg = dace.SDFG(sdfg_name)
    vecAdd_parent_state = vecAdd_parent_sdfg.add_state("vecAdd_parent")

    # ---------- ----------
    # ACCESS NODES
    # ---------- ----------

    x_name = "x"
    y_name = "y"
    z_name = "z"

    vecAdd_parent_sdfg.add_array(x_name, [n], dtype=dtype)
    vecAdd_parent_sdfg.add_array(y_name, [n], dtype=dtype)
    vecAdd_parent_sdfg.add_array(z_name, [n], dtype=dtype)

    x_in = vecAdd_parent_state.add_read(x_name)
    y_in = vecAdd_parent_state.add_read(y_name)
    z_out = vecAdd_parent_state.add_write(z_name)

    # ---------- ----------
    # COMPUTE
    # ---------- ----------

    # Create the nested SDFG for vector addition
    nested_sdfg_name = sdfg_name + "_nested"
    to_nest = make_vecAdd_sdfg(nested_sdfg_name, dtype)

    # Nest it and connect memlets
    nested_sdfg = vecAdd_parent_state.add_nested_sdfg(to_nest,
                                                      vecAdd_parent_sdfg,
                                                      {"x", "y"}, {"z"})
    vecAdd_parent_state.add_memlet_path(x_in,
                                        nested_sdfg,
                                        dst_conn="x",
                                        memlet=Memlet.simple(x_in,
                                                             "0:size",
                                                             num_accesses=n))
    vecAdd_parent_state.add_memlet_path(y_in,
                                        nested_sdfg,
                                        dst_conn="y",
                                        memlet=Memlet.simple(y_in,
                                                             "0:size",
                                                             num_accesses=n))
    vecAdd_parent_state.add_memlet_path(nested_sdfg,
                                        z_out,
                                        src_conn="z",
                                        memlet=Memlet.simple(z_out,
                                                             "0:size",
                                                             num_accesses=n))

    return vecAdd_parent_sdfg
Example #4
0
def _wait(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, request: str):

    from dace.libraries.mpi.nodes.wait import Wait

    libnode = Wait('_Wait_')

    req_range = None
    if isinstance(request, tuple):
        req_name, req_range = request
    else:
        req_name = request

    desc = sdfg.arrays[req_name]
    req_node = state.add_access(req_name)

    src = sdfg.add_temp_transient([1], dtypes.int32)
    src_node = state.add_write(src[0])
    tag = sdfg.add_temp_transient([1], dtypes.int32)
    tag_node = state.add_write(tag[0])

    if req_range:
        req_mem = Memlet.simple(req_name, req_range)
    else:
        req_mem = Memlet.from_array(req_name, desc)

    state.add_edge(req_node, None, libnode, '_request', req_mem)
    state.add_edge(libnode, '_stat_source', src_node, None,
                   Memlet.from_array(*src))
    state.add_edge(libnode, '_stat_tag', tag_node, None,
                   Memlet.from_array(*tag))

    return None
Example #5
0
def make_sdfg(dtype):

    n = dace.symbol("n")

    sdfg = dace.SDFG("mpi_bcast")
    state = sdfg.add_state("dataflow")

    sdfg.add_array("x", [n], dtype, transient=False)
    sdfg.add_array("root", [1], dace.dtypes.int32, transient=False)
    x = state.add_access("x")
    xout = state.add_access("x")
    root = state.add_access("root")
    bcast_node = mpi.nodes.bcast.Bcast("bcast")

    state.add_memlet_path(x,
                          bcast_node,
                          dst_conn="_inbuffer",
                          memlet=Memlet.simple(x, "0:n", num_accesses=n))
    state.add_memlet_path(root,
                          bcast_node,
                          dst_conn="_root",
                          memlet=Memlet.simple(root, "0:1", num_accesses=1))
    state.add_memlet_path(bcast_node,
                          xout,
                          src_conn="_outbuffer",
                          memlet=Memlet.simple(xout, "0:n", num_accesses=1))

    return sdfg
Example #6
0
def _Reduce(pv: 'ProgramVisitor',
            sdfg: SDFG,
            state: SDFGState,
            buffer: str,
            op: str,
            root: Union[str, sp.Expr, Number] = 0,
            grid: str = None):

    from dace.libraries.mpi.nodes.reduce import Reduce

    libnode = Reduce('_Reduce_', op, grid)
    desc = sdfg.arrays[buffer]
    in_buffer = state.add_read(buffer)
    out_buffer = state.add_write(buffer)
    if isinstance(root, str) and root in sdfg.arrays.keys():
        root_node = state.add_read(root)
    else:
        storage = desc.storage
        root_name = _define_local_scalar(pv, sdfg, state, dace.int32, storage)
        root_node = state.add_access(root_name)
        root_tasklet = state.add_tasklet('_set_root_', {}, {'__out'},
                                         '__out = {}'.format(root))
        state.add_edge(root_tasklet, '__out', root_node, None,
                       Memlet.simple(root_name, '0'))
    state.add_edge(in_buffer, None, libnode, '_inbuffer',
                   Memlet.from_array(buffer, desc))
    state.add_edge(root_node, None, libnode, '_root',
                   Memlet.simple(root_node.data, '0'))
    state.add_edge(libnode, '_outbuffer', out_buffer, None,
                   Memlet.from_array(buffer, desc))

    return None
Example #7
0
def test_dynamic_sdfg_with_math_functions():
    # Externals (parameters, symbols)
    N = dp.symbol('N')
    N.set(20)

    input = np.random.rand(N.get()).astype(np.float32)
    output = dp.ndarray([N], dp.float32)
    output[:] = dp.float32(0)

    # Construct SDFG
    mysdfg = SDFG('mymodexp')
    state = mysdfg.add_state()
    A = state.add_array('A', [N], dp.float32)
    B = state.add_array('B', [N], dp.float32)

    # Easy way to add a tasklet
    tasklet, map_entry, map_exit = state.add_mapped_tasklet(
        'mytasklet', dict(i='0:N'), dict(a=Memlet.simple(A, 'i % N')),
        'b = math.exp(a)', dict(b=Memlet.simple(B, 'i')))

    # Add outer edges
    state.add_edge(A, None, map_entry, None, Memlet.simple(A, '0:N'))
    state.add_edge(map_exit, None, B, None, Memlet.simple(B, '0:N'))

    mysdfg(A=input, B=output, N=N)
    #mymodexp_prog(input, output)

    diff = np.linalg.norm(np.exp(input) - output) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
Example #8
0
def _reduce(sdfg: SDFG,
            state: SDFGState,
            redfunction: Callable[[Any, Any], Any],
            in_array: str,
            out_array=None,
            axis=None,
            identity=None):
    if out_array is None:
        inarr = in_array
        # Convert axes to tuple
        if axis is not None and not isinstance(axis, (tuple, list)):
            axis = (axis, )
        if axis is not None:
            axis = tuple(pystr_to_symbolic(a) for a in axis)
        input_subset = parse_memlet_subset(sdfg.arrays[inarr],
                                           ast.parse(in_array).body[0].value,
                                           {})
        input_memlet = Memlet.simple(inarr, input_subset)
        output_shape = None
        if axis is None:
            output_shape = [1]
        else:
            output_subset = copy.deepcopy(input_subset)
            output_subset.pop(axis)
            output_shape = output_subset.size()
        outarr, arr = sdfg.add_temp_transient(output_shape,
                                              sdfg.arrays[inarr].dtype,
                                              sdfg.arrays[inarr].storage)
        output_memlet = Memlet.from_array(outarr, arr)
    else:
        inarr = in_array
        outarr = out_array

        # Convert axes to tuple
        if axis is not None and not isinstance(axis, (tuple, list)):
            axis = (axis, )
        if axis is not None:
            axis = tuple(pystr_to_symbolic(a) for a in axis)

        # Compute memlets
        input_subset = parse_memlet_subset(sdfg.arrays[inarr],
                                           ast.parse(in_array).body[0].value,
                                           {})
        input_memlet = Memlet.simple(inarr, input_subset)
        output_subset = parse_memlet_subset(sdfg.arrays[outarr],
                                            ast.parse(out_array).body[0].value,
                                            {})
        output_memlet = Memlet.simple(outarr, output_subset)

    # Create reduce subgraph
    inpnode = state.add_read(inarr)
    rednode = state.add_reduce(redfunction, axis, identity)
    outnode = state.add_write(outarr)
    state.add_nedge(inpnode, rednode, input_memlet)
    state.add_nedge(rednode, outnode, output_memlet)

    if out_array is None:
        return outarr
    else:
        return []
Example #9
0
def _assignop(sdfg: SDFG, state: SDFGState, op1: str, opcode: str, opname: str):
    """ Implements a general element-wise array assignment operator. """
    arr1 = sdfg.arrays[op1]

    name, _ = sdfg.add_temp_transient(arr1.shape, arr1.dtype, arr1.storage)
    write_memlet = None
    if opcode:
        write_memlet = Memlet.simple(
            name,
            ','.join(['__i%d' % i for i in range(len(arr1.shape))]),
            wcr_str='lambda x, y: x %s y' % opcode)
    else:
        write_memlet = Memlet.simple(
            name, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))
    state.add_mapped_tasklet(
        "_%s_" % opname,
        {'__i%d' % i: '0:%s' % s
         for i, s in enumerate(arr1.shape)}, {
             '__in1':
             Memlet.simple(
                 op1, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))
         },
        '__out = __in1', {'__out': write_memlet},
        external_edges=True)
    return name
Example #10
0
def pure_graph(implementation, dtype, veclen):

    sdfg_name = f"dot_{implementation}_{dtype.ctype}_w{veclen}"
    sdfg = dace.SDFG(sdfg_name)

    state = sdfg.add_state("dot")

    n = dace.symbol("n")
    a = dace.symbol("a")

    vtype = dace.vector(dtype, veclen)

    sdfg.add_array("x", [n / veclen], vtype)
    sdfg.add_array("y", [n / veclen], vtype)
    sdfg.add_array("r", [1], dtype)

    x = state.add_read("x")
    y = state.add_read("y")
    result = state.add_write("r")

    dot_node = blas.Dot("dot")
    dot_node.implementation = implementation
    dot_node.n = n

    state.add_memlet_path(x, dot_node, dst_conn="_x", memlet=Memlet(f"x[0:{n}/{veclen}]"))
    state.add_memlet_path(y, dot_node, dst_conn="_y", memlet=Memlet(f"y[0:{n}/{veclen}]"))
    state.add_memlet_path(dot_node, result, src_conn="_result", memlet=Memlet(f"r[0]"))

    return sdfg
Example #11
0
def test():
    print('SDFG consecutive tasklet test')
    # Externals (parameters, symbols)
    N = dp.symbol('N')
    N.set(20)
    input = dp.ndarray([N], dp.int32)
    output = dp.ndarray([N], dp.int32)
    input[:] = dp.int32(5)
    output[:] = dp.int32(0)

    # Construct SDFG
    mysdfg = SDFG('ctasklet')
    state = mysdfg.add_state()
    A_ = state.add_array('A', [N], dp.int32)
    B_ = state.add_array('B', [N], dp.int32)

    map_entry, map_exit = state.add_map('mymap', dict(i='0:N'))
    tasklet = state.add_tasklet('mytasklet', {'a'}, {'b'}, 'b = 5*a')
    state.add_edge(map_entry, None, tasklet, 'a', Memlet.simple(A_, 'i'))
    tasklet2 = state.add_tasklet('mytasklet2', {'c'}, {'d'}, 'd = 2*c')
    state.add_edge(tasklet, 'b', tasklet2, 'c', Memlet())
    state.add_edge(tasklet2, 'd', map_exit, None, Memlet.simple(B_, 'i'))

    # Add outer edges
    state.add_edge(A_, None, map_entry, None, Memlet.simple(A_, '0:N'))
    state.add_edge(map_exit, None, B_, None, Memlet.simple(B_, '0:N'))

    mysdfg(A=input, B=output, N=N)

    diff = np.linalg.norm(10 * input - output) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
Example #12
0
def _gather(pv: 'ProgramVisitor',
            sdfg: SDFG,
            state: SDFGState,
            in_buffer: str,
            out_buffer: str,
            root: Union[str, sp.Expr, Number] = 0):

    from dace.libraries.mpi.nodes.gather import Gather

    libnode = Gather('_Gather_')
    in_desc = sdfg.arrays[in_buffer]
    out_desc = sdfg.arrays[out_buffer]
    in_node = state.add_read(in_buffer)
    out_node = state.add_write(out_buffer)
    if isinstance(root, str) and root in sdfg.arrays.keys():
        root_node = state.add_read(root)
    else:
        storage = in_desc.storage
        root_name = _define_local_scalar(pv, sdfg, state, dace.int32, storage)
        root_node = state.add_access(root_name)
        root_tasklet = state.add_tasklet('_set_root_', {}, {'__out'},
                                         '__out = {}'.format(root))
        state.add_edge(root_tasklet, '__out', root_node, None,
                       Memlet.simple(root_name, '0'))
    state.add_edge(in_node, None, libnode, '_inbuffer',
                   Memlet.from_array(in_buffer, in_desc))
    state.add_edge(root_node, None, libnode, '_root',
                   Memlet.simple(root_node.data, '0'))
    state.add_edge(libnode, '_outbuffer', out_node, None,
                   Memlet.from_array(out_buffer, out_desc))

    return None
Example #13
0
def make_read_x():

    sdfg = SDFG("spmv_read_x")

    pre_state, body, post_state = make_iteration_space(sdfg)

    x_mem = body.add_array("x_mem", (W, ),
                           dtype,
                           storage=StorageType.FPGA_Global)
    col_pipe = body.add_stream("col_pipe",
                               itype,
                               storage=StorageType.FPGA_Local)
    compute_pipe = body.add_stream("compute_pipe",
                                   dtype,
                                   storage=StorageType.FPGA_Local)

    tasklet = body.add_tasklet("read_x", {"x_in", "col_in"}, {"x_out"},
                               "x_out = x_in[col_in]")

    body.add_memlet_path(x_mem,
                         tasklet,
                         dst_conn="x_in",
                         memlet=Memlet.simple(x_mem, "0:W"))
    body.add_memlet_path(col_pipe,
                         tasklet,
                         dst_conn="col_in",
                         memlet=Memlet.simple(col_pipe, "0"))
    body.add_memlet_path(tasklet,
                         compute_pipe,
                         src_conn="x_out",
                         memlet=Memlet.simple(compute_pipe, "0"))

    return sdfg
Example #14
0
def test():
    # Externals (parameters, symbols)
    N = dp.symbol('N')
    N.set(20)
    input = dp.ndarray([N], dp.int32)
    output = dp.ndarray([N], dp.int32)
    input[:] = dp.int32(5)
    output[:] = dp.int32(0)

    # Construct SDFG
    mysdfg = SDFG('mysdfg')
    state = mysdfg.add_state()
    A_ = state.add_array('A', [N], dp.int32)  # NOTE: The names A and B are not
    B_ = state.add_array('B', [N], dp.int32)  # reserved, this is just to
    # clarify that
    # variable name != array name

    # Easy way to add a tasklet
    tasklet, map_entry, map_exit = state.add_mapped_tasklet('mytasklet', dict(i='0:N'), dict(a=Memlet.simple(A_, 'i')),
                                                            'b = 5*a', dict(b=Memlet.simple(B_, 'i')))
    # Alternatively (the explicit way):
    #map_entry, map_exit = state.add_map('mymap', dict(i='0:N'))
    #tasklet = state.add_tasklet('mytasklet', {'a'}, {'b'}, 'b = 5*a')
    #state.add_edge(map_entry, None, tasklet, 'a', Memlet.simple(A_, 'i'))
    #state.add_edge(tasklet, 'b', map_exit, None, Memlet.simple(B_, 'i'))

    # Add outer edges
    state.add_edge(A_, None, map_entry, None, Memlet.simple(A_, '0:N'))
    state.add_edge(map_exit, None, B_, None, Memlet.simple(B_, '0:N'))

    mysdfg(A=input, B=output, N=N)

    diff = np.linalg.norm(5 * input - output) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
Example #15
0
def test_nested_symbol_type():
    test_sdfg = dace.SDFG("test_nested_symbol_type")
    test_state = test_sdfg.add_state("test_state")
    test_sdfg.add_symbol("s", dace.float32)
    test_sdfg.add_array('output', shape=[1], dtype=dace.float32)

    out = test_state.add_write('output')
    tasklet = test_state.add_tasklet('bugs', [], ['out'], 'out = s')

    test_state.add_memlet_path(tasklet,
                               out,
                               src_conn='out',
                               memlet=Memlet.simple(out.data, "0"))

    outer_sdfg = dace.SDFG("nested_symbol_type")
    outer_state = outer_sdfg.add_state("outer_state")

    outer_sdfg.add_symbol("s", dace.float32)
    outer_sdfg.add_array('data', shape=[1], dtype=dace.float32)

    data = outer_state.add_write('data')
    nested = outer_state.add_nested_sdfg(test_sdfg, outer_sdfg, {}, {'output'})

    outer_state.add_memlet_path(nested,
                                data,
                                src_conn='output',
                                memlet=Memlet.simple(data.data, "0"))

    compiledSDFG = outer_sdfg.compile()

    res = np.zeros(1, dtype=np.float32)
    compiledSDFG(data=res, s=np.float32(1.5))

    print("res:", res[0])
    assert res[0] == np.float32(1.5)
Example #16
0
 def expressions():
     state = sd.SDFGState()
     state.add_nedge(DeduplicateAccess._map_entry, DeduplicateAccess._node1,
                     Memlet())
     state.add_nedge(DeduplicateAccess._map_entry, DeduplicateAccess._node2,
                     Memlet())
     return [state]
Example #17
0
def nccl_send(pv: 'ProgramVisitor',
              sdfg: SDFG,
              state: SDFGState,
              in_buffer: str,
              peer: symbolic.SymbolicType = 0,
              group_handle: str = None):

    inputs = {"_inbuffer"}
    outputs = set()

    if isinstance(group_handle, str):
        gh_start = False
        if group_handle in sdfg.arrays.keys():
            gh_name = group_handle
            gh_out = state.add_access(gh_name)
            gh_in = state.add_access(gh_name)
            inputs.add("_group_handle")
        else:
            gh_start = True
            gh_name = _define_local_scalar(pv, sdfg, state, dace.int32,
                                           dtypes.StorageType.GPU_Global)
            gh_out = state.add_access(gh_name)
        outputs.add("_group_handle")

    libnode = Send(inputs=inputs, outputs=outputs, peer=peer)

    if isinstance(group_handle, str):
        gh_memlet = Memlet.simple(gh_name, '0')
        if not gh_start:
            state.add_edge(gh_in, None, libnode, "_group_handle", gh_memlet)
        state.add_edge(libnode, "_group_handle", gh_out, None, gh_memlet)

    in_range = None
    if isinstance(in_buffer, tuple):
        in_name, in_range = in_buffer
    else:
        in_name = in_buffer

    desc = sdfg.arrays[in_name]
    conn = libnode.in_connectors
    conn = {
        c: (dtypes.pointer(desc.dtype) if c == '_buffer' else t)
        for c, t in conn.items()
    }
    libnode.in_connectors = conn
    in_node = state.add_read(in_name)

    if in_range:
        buf_mem = Memlet.simple(in_name, in_range)
    else:
        buf_mem = Memlet.from_array(in_name, desc)

    state.add_edge(in_node, None, libnode, '_inbuffer', buf_mem)

    return []
Example #18
0
def test():
    print('Constant specialization test')

    N = dp.symbol('N')
    M = dp.symbol('M')
    N.set(20)
    M.set(30)
    fullrange = '1:N-1,0:M'
    irange = '1:N-1'
    jrange = '0:M'

    input = np.random.rand(N.get(), M.get()).astype(np.float32)
    output = dp.ndarray([N, M], dtype=dp.float32)
    output[:] = dp.float32(0)

    ##########################################################################
    spec_sdfg = SDFG('spectest')
    state = spec_sdfg.add_state()
    A = state.add_array('A', [N, M], dp.float32)
    Atrans = state.add_transient('At', [N - 2, M], dp.float32)
    B = state.add_array('B', [N, M], dp.float32)

    state.add_edge(A, None, Atrans, None, Memlet.simple(A, fullrange))
    _, me, mx = state.add_mapped_tasklet(
        'compute', dict(i=irange, j=jrange),
        dict(a=Memlet.simple(Atrans, 'i-1,j')), 'b = math.exp(a)',
        dict(b=Memlet.simple(B, 'i,j')))
    state.add_edge(Atrans, None, me, None, Memlet.simple(Atrans, fullrange))
    state.add_edge(mx, None, B, None, Memlet.simple(B, fullrange))

    spec_sdfg.fill_scope_connectors()
    dp.propagate_memlets_sdfg(spec_sdfg)
    spec_sdfg.validate()
    ##########################################################################

    code_nonspec = spec_sdfg.generate_code()

    assert 'Dynamic' in code_nonspec[0].code

    spec_sdfg.specialize(dict(N=N, M=M))
    code_spec = spec_sdfg.generate_code()

    assert 'Dynamic' not in code_spec[0].code

    func = spec_sdfg.compile()
    func(A=input, B=output, N=N, M=M)

    diff = np.linalg.norm(
        np.exp(input[1:(N.get() - 1), 0:M.get()]) - output[1:-1, :]) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
def make_nested_sdfg():
    sdfg = dace.SDFG('vol_propagation_nested')

    assign_loop_bound = sdfg.add_state('assign')
    guard_state = sdfg.add_state('guard')
    loop_state = sdfg.add_state('for')
    end_state = sdfg.add_state('endfor')

    sdfg.add_edge(assign_loop_bound, guard_state,
                  InterstateEdge(assignments={'i': '0'}))
    sdfg.add_edge(
        guard_state, loop_state,
        InterstateEdge(condition=CodeProperty.from_string(
            'i < loop_bound', language=Language.Python)))
    sdfg.add_edge(loop_state, guard_state,
                  InterstateEdge(assignments={'i': 'i+1'}))
    sdfg.add_edge(
        guard_state, end_state,
        InterstateEdge(condition=CodeProperty.from_string(
            'not (i < loop_bound)', language=Language.Python)))

    in_bound = assign_loop_bound.add_stream('IN_bound',
                                            dace.int32,
                                            storage=StorageType.FPGA_Local)
    loop_bound = assign_loop_bound.add_scalar(
        'loop_bound',
        dace.int32,
        transient=True,
        storage=StorageType.FPGA_Registers)
    assign_loop_bound.add_memlet_path(in_bound,
                                      loop_bound,
                                      memlet=Memlet.simple(loop_bound, '0'))

    in_a = loop_state.add_array('IN_a', [N],
                                dace.int32,
                                storage=StorageType.FPGA_Global)
    out_stream = loop_state.add_stream('OUT_stream',
                                       dace.int32,
                                       storage=StorageType.FPGA_Local)
    tasklet2 = loop_state.add_tasklet('compute', {'_IN_a'}, {'_OUT_stream'},
                                      '_OUT_stream = _IN_a[0]')
    loop_state.add_memlet_path(in_a,
                               tasklet2,
                               dst_conn='_IN_a',
                               memlet=Memlet.simple(in_a, '0:N'))
    loop_state.add_memlet_path(tasklet2,
                               out_stream,
                               src_conn='_OUT_stream',
                               memlet=Memlet.simple(out_stream, '0'))

    return sdfg
Example #20
0
def nccl_recv(pv: 'ProgramVisitor',
              sdfg: SDFG,
              state: SDFGState,
              out_buffer: str,
              peer: symbolic.SymbolicType = 0,
              group_handle: str = None):

    inputs = set()
    outputs = {"_outbuffer"}

    if isinstance(group_handle, str):
        gh_start = False
        if group_handle in sdfg.arrays.keys():
            gh_name = group_handle
            gh_out = state.add_access(gh_name)
            gh_in = state.add_access(gh_name)
            inputs.add("_group_handle")
        else:
            gh_start = True
            gh_name = _define_local_scalar(pv, sdfg, state, dace.int32,
                                           dtypes.StorageType.GPU_Global)
            gh_out = state.add_access(gh_name)
        outputs.add("_group_handle")

    libnode = Recv(inputs=inputs, outputs=outputs, peer=peer)

    if isinstance(group_handle, str):
        gh_memlet = Memlet.simple(gh_name, '0')
        if not gh_start:
            state.add_edge(gh_in, None, libnode, "_group_handle", gh_memlet)
        state.add_edge(libnode, "_group_handle", gh_out, None, gh_memlet)

    out_range = None
    if isinstance(out_buffer, tuple):
        out_name, out_range = out_buffer
        out_node = state.add_write(out_name)
    elif isinstance(out_buffer, str) and out_buffer in sdfg.arrays.keys():
        out_name = out_buffer
        out_node = state.add_write(out_name)
    else:
        raise ValueError(
            "NCCL_Recv out_buffer must be an array, or a an array range tuple.")

    if out_range:
        out_mem = Memlet.simple(out_name, out_range)
    else:
        out_mem = Memlet.simple(out_name, '0')

    state.add_edge(libnode, '_outbuffer', out_node, None, out_mem)

    return []
Example #21
0
def nccl_reduce(pv: 'ProgramVisitor',
                sdfg: SDFG,
                state: SDFGState,
                redfunction: Callable[[Any, Any], Any],
                in_buffer: str,
                out_buffer: Union[str, None] = None,
                root: str = None,
                group_handle: str = None):

    inputs = {"_inbuffer"}
    outputs = {"_outbuffer"}

    if isinstance(group_handle, str):
        gh_start = False
        if group_handle in sdfg.arrays.keys():
            gh_name = group_handle
            gh_out = state.add_access(gh_name)
            gh_in = state.add_access(gh_name)
            inputs.add("_group_handle")
        else:
            gh_start = True
            gh_name = _define_local_scalar(pv, sdfg, state, dace.int32,
                                           dtypes.StorageType.GPU_Global)
            gh_out = state.add_access(gh_name)
        outputs.add("_group_handle")

    libnode = Reduce(inputs=inputs,
                     outputs=outputs,
                     wcr=redfunction,
                     root=root)

    if isinstance(group_handle, str):
        gh_memlet = Memlet.simple(gh_name, '0')
        if not gh_start:
            state.add_edge(gh_in, None, libnode, "_group_handle", gh_memlet)
        state.add_edge(libnode, "_group_handle", gh_out, None, gh_memlet)

    # If out_buffer is not specified, the operation will be in-place.
    if out_buffer is None:
        out_buffer = in_buffer

    # Add nodes
    in_node = state.add_read(in_buffer)
    out_node = state.add_write(out_buffer)

    # Connect nodes
    state.add_edge(in_node, None, libnode, '_inbuffer', Memlet(in_buffer))
    state.add_edge(libnode, '_outbuffer', out_node, None, Memlet(out_buffer))

    return []
Example #22
0
def pure_graph(dtype,
               transposed,
               expansion,
               veclen,
               alpha,
               beta,
               expansion_args=None):

    sdfg = dace.SDFG(f"gemv_{expansion}_{dtype}_{transposed}_w{veclen}")

    m = dace.symbol("m")
    n = dace.symbol("n")
    n /= veclen
    vtype = dace.vector(dtype, veclen)

    state = sdfg.add_state("gemv_compute")

    A_rows = m
    A_cols = n
    x_size = n if not transposed else m
    y_size = m if not transposed else n

    sdfg.add_array("A", shape=[A_rows, A_cols], dtype=vtype)
    sdfg.add_array("x", shape=[x_size], dtype=dtype if transposed else vtype)
    sdfg.add_array("y", shape=[y_size], dtype=vtype if transposed else dtype)

    A = state.add_read("A")
    x = state.add_read("x")
    result = state.add_write("y")

    gemv_node = blas.Gemv("gemv", transA=transposed, alpha=alpha, beta=beta)
    gemv_node.implementation = expansion

    state.add_memlet_path(A,
                          gemv_node,
                          dst_conn="_A",
                          memlet=Memlet(f"A[0:{A_rows}, 0:{A_cols}]"))
    state.add_memlet_path(x,
                          gemv_node,
                          dst_conn="_x",
                          memlet=Memlet(f"x[0:{x_size}]"))
    state.add_memlet_path(gemv_node,
                          result,
                          src_conn="_y",
                          memlet=Memlet(f"y[0:{y_size}]"))

    if expansion_args is not None:
        gemv_node.expand(sdfg, state, **expansion_args)

    return sdfg
Example #23
0
def _block_gather(pv: 'ProgramVisitor',
                  sdfg: SDFG,
                  state: SDFGState,
                  in_buffer: str,
                  out_buffer: str,
                  gather_grid: str,
                  reduce_grid: str = None,
                  correspondence: Sequence[Integral] = None):
    """ Block-gathers an Array using process-grids, sub-arrays, and the BlockGather library node.
        This method currently does not support Array slices and imperfect tiling.
        :param in_buffer: Name of the (local) Array descriptor.
        :param out_buffer: Name of the (global) Array descriptor.
        :param gather_grid: Name of the sub-grid used for gathering the Array (reduction group leaders).
        :param reduce_grid: Name of the sub-grid used for broadcasting the Array (reduction groups). 
        :param correspondence: Matching of the array/sub-array's dimensions to the process-grid's dimensions.
        :return: Name of the new sub-array descriptor.
    """
    in_desc = sdfg.arrays[in_buffer]
    out_desc = sdfg.arrays[out_buffer]

    if in_desc.dtype != out_desc.dtype:
        raise ValueError("Input/output buffer datatypes must match!")

    subarray_name = _subarray(pv,
                              sdfg,
                              state,
                              out_buffer,
                              in_buffer,
                              process_grid=gather_grid,
                              correspondence=correspondence)

    from dace.libraries.mpi import BlockGather
    libnode = BlockGather('_BlockGather_', subarray_name, gather_grid,
                          reduce_grid)

    inbuf_name = in_buffer
    in_desc = sdfg.arrays[inbuf_name]
    inbuf_node = state.add_read(inbuf_name)
    inbuf_mem = Memlet.from_array(inbuf_name, in_desc)

    outbuf_name = out_buffer
    out_desc = sdfg.arrays[outbuf_name]
    outbuf_node = state.add_write(outbuf_name)
    outbuf_mem = Memlet.from_array(outbuf_name, out_desc)

    state.add_edge(inbuf_node, None, libnode, '_inp_buffer', inbuf_mem)
    state.add_edge(libnode, '_out_buffer', outbuf_node, None, outbuf_mem)

    return subarray_name
Example #24
0
def make_compute_sdfg():

    sdfg = SDFG("spmv_compute")

    pre_state, body, post_state = make_iteration_space(sdfg)

    a_pipe = body.add_stream("a_pipe", dtype, storage=StorageType.FPGA_Local)
    x_pipe = body.add_stream("x_pipe", dtype, storage=StorageType.FPGA_Local)
    b_buffer_in = body.add_scalar("b_buffer",
                                  dtype,
                                  transient=True,
                                  storage=StorageType.FPGA_Registers)
    b_buffer_out = body.add_scalar("b_buffer",
                                   dtype,
                                   transient=True,
                                   storage=StorageType.FPGA_Registers)
    nested_sdfg = make_compute_nested_sdfg()
    tasklet = body.add_nested_sdfg(nested_sdfg, sdfg, {"a_in", "x_in", "b_in"},
                                   {"b_out"})
    body.add_memlet_path(a_pipe,
                         tasklet,
                         dst_conn="a_in",
                         memlet=Memlet.simple(a_pipe, "0"))
    body.add_memlet_path(b_buffer_in,
                         tasklet,
                         dst_conn="b_in",
                         memlet=Memlet.simple(b_buffer_in, "0"))
    body.add_memlet_path(x_pipe,
                         tasklet,
                         dst_conn="x_in",
                         memlet=Memlet.simple(x_pipe, "0"))
    body.add_memlet_path(tasklet,
                         b_buffer_out,
                         src_conn="b_out",
                         memlet=Memlet.simple(b_buffer_out, "0"))

    b_buffer_post_in = post_state.add_scalar("b_buffer",
                                             dtype,
                                             transient=True,
                                             storage=StorageType.FPGA_Registers)
    b_pipe = post_state.add_stream("b_pipe",
                                   dtype,
                                   storage=StorageType.FPGA_Local)
    post_state.add_memlet_path(b_buffer_post_in,
                               b_pipe,
                               memlet=Memlet.simple(b_pipe, "0"))

    return sdfg
Example #25
0
def make_write_sdfg():

    sdfg = SDFG("spmv_write")

    begin = sdfg.add_state("begin")
    entry = sdfg.add_state("entry")
    state = sdfg.add_state("body")
    end = sdfg.add_state("end")

    sdfg.add_edge(begin, entry, InterstateEdge(assignments={"h": "0"}))

    sdfg.add_edge(
        entry, state,
        InterstateEdge(condition=CodeProperty.from_string(
            "h < H", language=Language.Python)))

    sdfg.add_edge(
        entry, end,
        InterstateEdge(condition=CodeProperty.from_string(
            "h >= H", language=Language.Python)))

    sdfg.add_edge(state, entry, InterstateEdge(assignments={"h": "h + 1"}))

    result_to_write_in = state.add_stream("b_pipe",
                                          dtype,
                                          storage=StorageType.FPGA_Local)
    b = state.add_array("b_mem", (H, ), dtype, storage=StorageType.FPGA_Global)

    state.add_memlet_path(result_to_write_in, b, memlet=Memlet.simple(b, "h"))

    return sdfg
Example #26
0
def parse_memlet(visitor, src: MemletType, dst: MemletType,
                 defined_arrays_and_symbols: Dict[str, data.Data]):
    srcexpr, dstexpr, localvar = None, None, None
    if isinstance(src,
                  ast.Name) and rname(src) not in defined_arrays_and_symbols:
        localvar = rname(src)
    else:
        srcexpr = ParseMemlet(visitor, defined_arrays_and_symbols, src)
    if isinstance(dst,
                  ast.Name) and rname(dst) not in defined_arrays_and_symbols:
        if localvar is not None:
            raise DaceSyntaxError(
                visitor, src,
                'Memlet source and destination cannot both be local variables')
        localvar = rname(dst)
    else:
        dstexpr = ParseMemlet(visitor, defined_arrays_and_symbols, dst)

    if srcexpr is not None and dstexpr is not None:
        # Create two memlets
        raise NotImplementedError
    elif srcexpr is not None:
        expr = srcexpr
    else:
        expr = dstexpr

    return localvar, Memlet(expr.name,
                            expr.accesses,
                            expr.subset,
                            1,
                            wcr=expr.wcr)
Example #27
0
def _cart_create(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState,
                 dims: ShapeType):
    """ Creates a process-grid and adds it to the DaCe program. The process-grid is implemented with [MPI_Cart_create](https://www.mpich.org/static/docs/latest/www3/MPI_Cart_create.html).
        :param dims: Shape of the process-grid (see `dims` parameter of `MPI_Cart_create`), e.g., [2, 3, 3].
        :return: Name of the new process-grid descriptor.
    """
    pgrid_name = sdfg.add_pgrid(dims)

    # Dummy tasklet adds MPI variables to the program's state.
    from dace.libraries.mpi import Dummy
    tasklet = Dummy(pgrid_name, [
        f'MPI_Comm {pgrid_name}_comm;',
        f'MPI_Group {pgrid_name}_group;',
        f'int {pgrid_name}_coords[{len(dims)}];',
        f'int {pgrid_name}_dims[{len(dims)}];',
        f'int {pgrid_name}_rank;',
        f'int {pgrid_name}_size;',
        f'bool {pgrid_name}_valid;',
    ])

    state.add_node(tasklet)

    # Pseudo-writing to a dummy variable to avoid removal of Dummy node by transformations.
    _, scal = sdfg.add_scalar(pgrid_name, dace.int32, transient=True)
    wnode = state.add_write(pgrid_name)
    state.add_edge(tasklet, '__out', wnode, None,
                   Memlet.from_array(pgrid_name, scal))

    return pgrid_name
Example #28
0
def _propagate_node(dfg_state, node):
    if isinstance(node, nodes.EntryNode):
        internal_edges = [
            e for e in dfg_state.out_edges(node)
            if e.src_conn and e.src_conn.startswith('OUT_')
        ]
        external_edges = [
            e for e in dfg_state.in_edges(node)
            if e.dst_conn and e.dst_conn.startswith('IN_')
        ]
    else:
        internal_edges = [
            e for e in dfg_state.in_edges(node)
            if e.dst_conn and e.dst_conn.startswith('IN_')
        ]
        external_edges = [
            e for e in dfg_state.out_edges(node)
            if e.src_conn and e.src_conn.startswith('OUT_')
        ]

    for edge in external_edges:
        if edge.data.is_empty():
            new_memlet = Memlet()
        else:
            internal_edge = next(e for e in internal_edges
                                 if e.data.data == edge.data.data)
            new_memlet = propagate_memlet(dfg_state, internal_edge.data, node,
                                          True)
        edge._data = new_memlet
Example #29
0
def _add_astmemlet_edge(sdfg,
                        state,
                        src_node,
                        src_conn,
                        dst_node,
                        dst_conn,
                        ast_memlet,
                        data=None,
                        wcr=None,
                        wcr_identity=None):
    try:
        if src_node.data == dst_node.data:
            raise RuntimeError("Added edge connection data nodes "
                               "with same descriptor: {} to {}".format(
                                   src_node, dst_node))
    except AttributeError:
        pass
    if _subset_has_indirection(ast_memlet.subset):
        add_indirection_subgraph(sdfg, state, src_node, dst_node, ast_memlet)
        return

    if data is not None:
        raise NotImplementedError('This should never happen')

    memlet = Memlet(ast_memlet.dataname, ast_memlet.num_accesses,
                    ast_memlet.subset, ast_memlet.veclen, wcr, wcr_identity)
    state.add_edge(src_node, src_conn, dst_node, dst_conn, memlet)
def make_sdfg():
    sdfg = dace.SDFG('vol_propagation')

    sdfg.add_symbol('N', dace.int32)
    sdfg.add_symbol('M', dace.int32)

    state = sdfg.add_state('main')

    a_in = state.add_array('A_in', [N], dace.int32,
                           storage=StorageType.FPGA_Global)
    bound_pipe = state.add_stream('bound_in', dace.int32, transient=True,
                                  storage=StorageType.FPGA_Local)
    out_stream = state.add_stream('out_stream', dace.int32, transient=True,
                                  storage=StorageType.FPGA_Local)

    nest = state.add_nested_sdfg(
        make_nested_sdfg(),
        sdfg,
        {
            'IN_a',
            'IN_bound',
        },
        {
            'OUT_stream',
        }
    )

    state.add_memlet_path(
        a_in,
        nest,
        dst_conn='IN_a',
        memlet=Memlet.simple(a_in, '0:N')
    )
    state.add_memlet_path(
        bound_pipe,
        nest,
        dst_conn='IN_bound',
        memlet=Memlet.simple(bound_pipe, '0', num_accesses=-1)
    )
    state.add_memlet_path(
        nest,
        out_stream,
        src_conn='OUT_stream',
        memlet=Memlet.simple(out_stream, '0', num_accesses=-1)
    )

    return sdfg