Ejemplo n.º 1
0
def make_read_col():

    sdfg = SDFG("spmv_read_col")

    pre_state, body, post_state = make_iteration_space(sdfg)

    a_col = body.add_array("A_col_mem", (nnz, ),
                           itype,
                           storage=StorageType.FPGA_Global)
    col_pipe = body.add_stream("col_pipe",
                               itype,
                               storage=StorageType.FPGA_Local)

    tasklet = body.add_tasklet("read_col", {"col_in"}, {"col_out"},
                               "col_out = col_in[row_begin + c]")

    body.add_memlet_path(a_col,
                         tasklet,
                         dst_conn="col_in",
                         memlet=Memlet.simple(a_col, "0:nnz"))
    body.add_memlet_path(tasklet,
                         col_pipe,
                         src_conn="col_out",
                         memlet=Memlet.simple(col_pipe, "0"))

    return sdfg
Ejemplo n.º 2
0
def make_sdfg(dtype):

    n = dace.symbol("n")

    sdfg = dace.SDFG("mpi_reduce")
    state = sdfg.add_state("dataflow")

    sdfg.add_array("inbuf", [n], dtype, transient=False)
    sdfg.add_array("outbuf", [n], dtype, transient=False)
    sdfg.add_array("root", [1], dace.dtypes.int32, transient=False)
    inbuf = state.add_access("inbuf")
    outbuf = state.add_access("outbuf")
    root = state.add_access("root")
    reduce_node = mpi.nodes.reduce.Reduce("reduce")

    state.add_memlet_path(inbuf,
                          reduce_node,
                          dst_conn="_inbuffer",
                          memlet=Memlet.simple(inbuf, "0:n", num_accesses=n))
    state.add_memlet_path(root,
                          reduce_node,
                          dst_conn="_root",
                          memlet=Memlet.simple(root, "0:1", num_accesses=1))
    state.add_memlet_path(reduce_node,
                          outbuf,
                          src_conn="_outbuffer",
                          memlet=Memlet.simple(outbuf, "0:n", num_accesses=n))

    return sdfg
Ejemplo n.º 3
0
def test_nested_symbol_type():
    test_sdfg = dace.SDFG("test_nested_symbol_type")
    test_state = test_sdfg.add_state("test_state")
    test_sdfg.add_symbol("s", dace.float32)
    test_sdfg.add_array('output', shape=[1], dtype=dace.float32)

    out = test_state.add_write('output')
    tasklet = test_state.add_tasklet('bugs', [], ['out'], 'out = s')

    test_state.add_memlet_path(tasklet,
                               out,
                               src_conn='out',
                               memlet=Memlet.simple(out.data, "0"))

    outer_sdfg = dace.SDFG("nested_symbol_type")
    outer_state = outer_sdfg.add_state("outer_state")

    outer_sdfg.add_symbol("s", dace.float32)
    outer_sdfg.add_array('data', shape=[1], dtype=dace.float32)

    data = outer_state.add_write('data')
    nested = outer_state.add_nested_sdfg(test_sdfg, outer_sdfg, {}, {'output'})

    outer_state.add_memlet_path(nested,
                                data,
                                src_conn='output',
                                memlet=Memlet.simple(data.data, "0"))

    compiledSDFG = outer_sdfg.compile()

    res = np.zeros(1, dtype=np.float32)
    compiledSDFG(data=res, s=np.float32(1.5))

    print("res:", res[0])
    assert res[0] == np.float32(1.5)
Ejemplo n.º 4
0
def _reduce(sdfg: SDFG,
            state: SDFGState,
            redfunction: Callable[[Any, Any], Any],
            in_array: str,
            out_array=None,
            axis=None,
            identity=None):
    if out_array is None:
        inarr = in_array
        # Convert axes to tuple
        if axis is not None and not isinstance(axis, (tuple, list)):
            axis = (axis, )
        if axis is not None:
            axis = tuple(pystr_to_symbolic(a) for a in axis)
        input_subset = parse_memlet_subset(sdfg.arrays[inarr],
                                           ast.parse(in_array).body[0].value,
                                           {})
        input_memlet = Memlet.simple(inarr, input_subset)
        output_shape = None
        if axis is None:
            output_shape = [1]
        else:
            output_subset = copy.deepcopy(input_subset)
            output_subset.pop(axis)
            output_shape = output_subset.size()
        outarr, arr = sdfg.add_temp_transient(output_shape,
                                              sdfg.arrays[inarr].dtype,
                                              sdfg.arrays[inarr].storage)
        output_memlet = Memlet.from_array(outarr, arr)
    else:
        inarr = in_array
        outarr = out_array

        # Convert axes to tuple
        if axis is not None and not isinstance(axis, (tuple, list)):
            axis = (axis, )
        if axis is not None:
            axis = tuple(pystr_to_symbolic(a) for a in axis)

        # Compute memlets
        input_subset = parse_memlet_subset(sdfg.arrays[inarr],
                                           ast.parse(in_array).body[0].value,
                                           {})
        input_memlet = Memlet.simple(inarr, input_subset)
        output_subset = parse_memlet_subset(sdfg.arrays[outarr],
                                            ast.parse(out_array).body[0].value,
                                            {})
        output_memlet = Memlet.simple(outarr, output_subset)

    # Create reduce subgraph
    inpnode = state.add_read(inarr)
    rednode = state.add_reduce(redfunction, axis, identity)
    outnode = state.add_write(outarr)
    state.add_nedge(inpnode, rednode, input_memlet)
    state.add_nedge(rednode, outnode, output_memlet)

    if out_array is None:
        return outarr
    else:
        return []
Ejemplo n.º 5
0
def make_read_x():

    sdfg = SDFG("spmv_read_x")

    pre_state, body, post_state = make_iteration_space(sdfg)

    x_mem = body.add_array("x_mem", (W, ),
                           dtype,
                           storage=StorageType.FPGA_Global)
    col_pipe = body.add_stream("col_pipe",
                               itype,
                               storage=StorageType.FPGA_Local)
    compute_pipe = body.add_stream("compute_pipe",
                                   dtype,
                                   storage=StorageType.FPGA_Local)

    tasklet = body.add_tasklet("read_x", {"x_in", "col_in"}, {"x_out"},
                               "x_out = x_in[col_in]")

    body.add_memlet_path(x_mem,
                         tasklet,
                         dst_conn="x_in",
                         memlet=Memlet.simple(x_mem, "0:W"))
    body.add_memlet_path(col_pipe,
                         tasklet,
                         dst_conn="col_in",
                         memlet=Memlet.simple(col_pipe, "0"))
    body.add_memlet_path(tasklet,
                         compute_pipe,
                         src_conn="x_out",
                         memlet=Memlet.simple(compute_pipe, "0"))

    return sdfg
Ejemplo n.º 6
0
def test():
    print('SDFG consecutive tasklet test')
    # Externals (parameters, symbols)
    N = dp.symbol('N')
    N.set(20)
    input = dp.ndarray([N], dp.int32)
    output = dp.ndarray([N], dp.int32)
    input[:] = dp.int32(5)
    output[:] = dp.int32(0)

    # Construct SDFG
    mysdfg = SDFG('ctasklet')
    state = mysdfg.add_state()
    A_ = state.add_array('A', [N], dp.int32)
    B_ = state.add_array('B', [N], dp.int32)

    map_entry, map_exit = state.add_map('mymap', dict(i='0:N'))
    tasklet = state.add_tasklet('mytasklet', {'a'}, {'b'}, 'b = 5*a')
    state.add_edge(map_entry, None, tasklet, 'a', Memlet.simple(A_, 'i'))
    tasklet2 = state.add_tasklet('mytasklet2', {'c'}, {'d'}, 'd = 2*c')
    state.add_edge(tasklet, 'b', tasklet2, 'c', Memlet())
    state.add_edge(tasklet2, 'd', map_exit, None, Memlet.simple(B_, 'i'))

    # Add outer edges
    state.add_edge(A_, None, map_entry, None, Memlet.simple(A_, '0:N'))
    state.add_edge(map_exit, None, B_, None, Memlet.simple(B_, '0:N'))

    mysdfg(A=input, B=output, N=N)

    diff = np.linalg.norm(10 * input - output) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
Ejemplo n.º 7
0
def test():
    # Externals (parameters, symbols)
    N = dp.symbol('N')
    N.set(20)
    input = dp.ndarray([N], dp.int32)
    output = dp.ndarray([N], dp.int32)
    input[:] = dp.int32(5)
    output[:] = dp.int32(0)

    # Construct SDFG
    mysdfg = SDFG('mysdfg')
    state = mysdfg.add_state()
    A_ = state.add_array('A', [N], dp.int32)  # NOTE: The names A and B are not
    B_ = state.add_array('B', [N], dp.int32)  # reserved, this is just to
    # clarify that
    # variable name != array name

    # Easy way to add a tasklet
    tasklet, map_entry, map_exit = state.add_mapped_tasklet('mytasklet', dict(i='0:N'), dict(a=Memlet.simple(A_, 'i')),
                                                            'b = 5*a', dict(b=Memlet.simple(B_, 'i')))
    # Alternatively (the explicit way):
    #map_entry, map_exit = state.add_map('mymap', dict(i='0:N'))
    #tasklet = state.add_tasklet('mytasklet', {'a'}, {'b'}, 'b = 5*a')
    #state.add_edge(map_entry, None, tasklet, 'a', Memlet.simple(A_, 'i'))
    #state.add_edge(tasklet, 'b', map_exit, None, Memlet.simple(B_, 'i'))

    # Add outer edges
    state.add_edge(A_, None, map_entry, None, Memlet.simple(A_, '0:N'))
    state.add_edge(map_exit, None, B_, None, Memlet.simple(B_, '0:N'))

    mysdfg(A=input, B=output, N=N)

    diff = np.linalg.norm(5 * input - output) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
Ejemplo n.º 8
0
def test_dynamic_sdfg_with_math_functions():
    # Externals (parameters, symbols)
    N = dp.symbol('N')
    N.set(20)

    input = np.random.rand(N.get()).astype(np.float32)
    output = dp.ndarray([N], dp.float32)
    output[:] = dp.float32(0)

    # Construct SDFG
    mysdfg = SDFG('mymodexp')
    state = mysdfg.add_state()
    A = state.add_array('A', [N], dp.float32)
    B = state.add_array('B', [N], dp.float32)

    # Easy way to add a tasklet
    tasklet, map_entry, map_exit = state.add_mapped_tasklet(
        'mytasklet', dict(i='0:N'), dict(a=Memlet.simple(A, 'i % N')),
        'b = math.exp(a)', dict(b=Memlet.simple(B, 'i')))

    # Add outer edges
    state.add_edge(A, None, map_entry, None, Memlet.simple(A, '0:N'))
    state.add_edge(map_exit, None, B, None, Memlet.simple(B, '0:N'))

    mysdfg(A=input, B=output, N=N)
    #mymodexp_prog(input, output)

    diff = np.linalg.norm(np.exp(input) - output) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
Ejemplo n.º 9
0
def make_sdfg(dtype):

    n = dace.symbol("n")

    sdfg = dace.SDFG("mpi_bcast")
    state = sdfg.add_state("dataflow")

    sdfg.add_array("x", [n], dtype, transient=False)
    sdfg.add_array("root", [1], dace.dtypes.int32, transient=False)
    x = state.add_access("x")
    xout = state.add_access("x")
    root = state.add_access("root")
    bcast_node = mpi.nodes.bcast.Bcast("bcast")

    state.add_memlet_path(x,
                          bcast_node,
                          dst_conn="_inbuffer",
                          memlet=Memlet.simple(x, "0:n", num_accesses=n))
    state.add_memlet_path(root,
                          bcast_node,
                          dst_conn="_root",
                          memlet=Memlet.simple(root, "0:1", num_accesses=1))
    state.add_memlet_path(bcast_node,
                          xout,
                          src_conn="_outbuffer",
                          memlet=Memlet.simple(xout, "0:n", num_accesses=1))

    return sdfg
Ejemplo n.º 10
0
def make_nested_vecAdd_sdfg(sdfg_name: str, dtype=dace.float32):
    '''
    Builds an SDFG for vector addition. Internally has a nested SDFG in charge of actually
    performing the computation.
    :param sdfg_name: name to give to the sdfg
    :param dtype: used data type
    :return: an SDFG
    '''
    n = dace.symbol("size")
    vecAdd_parent_sdfg = dace.SDFG(sdfg_name)
    vecAdd_parent_state = vecAdd_parent_sdfg.add_state("vecAdd_parent")

    # ---------- ----------
    # ACCESS NODES
    # ---------- ----------

    x_name = "x"
    y_name = "y"
    z_name = "z"

    vecAdd_parent_sdfg.add_array(x_name, [n], dtype=dtype)
    vecAdd_parent_sdfg.add_array(y_name, [n], dtype=dtype)
    vecAdd_parent_sdfg.add_array(z_name, [n], dtype=dtype)

    x_in = vecAdd_parent_state.add_read(x_name)
    y_in = vecAdd_parent_state.add_read(y_name)
    z_out = vecAdd_parent_state.add_write(z_name)

    # ---------- ----------
    # COMPUTE
    # ---------- ----------

    # Create the nested SDFG for vector addition
    nested_sdfg_name = sdfg_name + "_nested"
    to_nest = make_vecAdd_sdfg(nested_sdfg_name, dtype)

    # Nest it and connect memlets
    nested_sdfg = vecAdd_parent_state.add_nested_sdfg(to_nest,
                                                      vecAdd_parent_sdfg,
                                                      {"x", "y"}, {"z"})
    vecAdd_parent_state.add_memlet_path(x_in,
                                        nested_sdfg,
                                        dst_conn="x",
                                        memlet=Memlet.simple(x_in,
                                                             "0:size",
                                                             num_accesses=n))
    vecAdd_parent_state.add_memlet_path(y_in,
                                        nested_sdfg,
                                        dst_conn="y",
                                        memlet=Memlet.simple(y_in,
                                                             "0:size",
                                                             num_accesses=n))
    vecAdd_parent_state.add_memlet_path(nested_sdfg,
                                        z_out,
                                        src_conn="z",
                                        memlet=Memlet.simple(z_out,
                                                             "0:size",
                                                             num_accesses=n))

    return vecAdd_parent_sdfg
Ejemplo n.º 11
0
def _gather(pv: 'ProgramVisitor',
            sdfg: SDFG,
            state: SDFGState,
            in_buffer: str,
            out_buffer: str,
            root: Union[str, sp.Expr, Number] = 0):

    from dace.libraries.mpi.nodes.gather import Gather

    libnode = Gather('_Gather_')
    in_desc = sdfg.arrays[in_buffer]
    out_desc = sdfg.arrays[out_buffer]
    in_node = state.add_read(in_buffer)
    out_node = state.add_write(out_buffer)
    if isinstance(root, str) and root in sdfg.arrays.keys():
        root_node = state.add_read(root)
    else:
        storage = in_desc.storage
        root_name = _define_local_scalar(pv, sdfg, state, dace.int32, storage)
        root_node = state.add_access(root_name)
        root_tasklet = state.add_tasklet('_set_root_', {}, {'__out'},
                                         '__out = {}'.format(root))
        state.add_edge(root_tasklet, '__out', root_node, None,
                       Memlet.simple(root_name, '0'))
    state.add_edge(in_node, None, libnode, '_inbuffer',
                   Memlet.from_array(in_buffer, in_desc))
    state.add_edge(root_node, None, libnode, '_root',
                   Memlet.simple(root_node.data, '0'))
    state.add_edge(libnode, '_outbuffer', out_node, None,
                   Memlet.from_array(out_buffer, out_desc))

    return None
Ejemplo n.º 12
0
def _Reduce(pv: 'ProgramVisitor',
            sdfg: SDFG,
            state: SDFGState,
            buffer: str,
            op: str,
            root: Union[str, sp.Expr, Number] = 0,
            grid: str = None):

    from dace.libraries.mpi.nodes.reduce import Reduce

    libnode = Reduce('_Reduce_', op, grid)
    desc = sdfg.arrays[buffer]
    in_buffer = state.add_read(buffer)
    out_buffer = state.add_write(buffer)
    if isinstance(root, str) and root in sdfg.arrays.keys():
        root_node = state.add_read(root)
    else:
        storage = desc.storage
        root_name = _define_local_scalar(pv, sdfg, state, dace.int32, storage)
        root_node = state.add_access(root_name)
        root_tasklet = state.add_tasklet('_set_root_', {}, {'__out'},
                                         '__out = {}'.format(root))
        state.add_edge(root_tasklet, '__out', root_node, None,
                       Memlet.simple(root_name, '0'))
    state.add_edge(in_buffer, None, libnode, '_inbuffer',
                   Memlet.from_array(buffer, desc))
    state.add_edge(root_node, None, libnode, '_root',
                   Memlet.simple(root_node.data, '0'))
    state.add_edge(libnode, '_outbuffer', out_buffer, None,
                   Memlet.from_array(buffer, desc))

    return None
Ejemplo n.º 13
0
def _assignop(sdfg: SDFG, state: SDFGState, op1: str, opcode: str, opname: str):
    """ Implements a general element-wise array assignment operator. """
    arr1 = sdfg.arrays[op1]

    name, _ = sdfg.add_temp_transient(arr1.shape, arr1.dtype, arr1.storage)
    write_memlet = None
    if opcode:
        write_memlet = Memlet.simple(
            name,
            ','.join(['__i%d' % i for i in range(len(arr1.shape))]),
            wcr_str='lambda x, y: x %s y' % opcode)
    else:
        write_memlet = Memlet.simple(
            name, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))
    state.add_mapped_tasklet(
        "_%s_" % opname,
        {'__i%d' % i: '0:%s' % s
         for i, s in enumerate(arr1.shape)}, {
             '__in1':
             Memlet.simple(
                 op1, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))
         },
        '__out = __in1', {'__out': write_memlet},
        external_edges=True)
    return name
Ejemplo n.º 14
0
def nccl_send(pv: 'ProgramVisitor',
              sdfg: SDFG,
              state: SDFGState,
              in_buffer: str,
              peer: symbolic.SymbolicType = 0,
              group_handle: str = None):

    inputs = {"_inbuffer"}
    outputs = set()

    if isinstance(group_handle, str):
        gh_start = False
        if group_handle in sdfg.arrays.keys():
            gh_name = group_handle
            gh_out = state.add_access(gh_name)
            gh_in = state.add_access(gh_name)
            inputs.add("_group_handle")
        else:
            gh_start = True
            gh_name = _define_local_scalar(pv, sdfg, state, dace.int32,
                                           dtypes.StorageType.GPU_Global)
            gh_out = state.add_access(gh_name)
        outputs.add("_group_handle")

    libnode = Send(inputs=inputs, outputs=outputs, peer=peer)

    if isinstance(group_handle, str):
        gh_memlet = Memlet.simple(gh_name, '0')
        if not gh_start:
            state.add_edge(gh_in, None, libnode, "_group_handle", gh_memlet)
        state.add_edge(libnode, "_group_handle", gh_out, None, gh_memlet)

    in_range = None
    if isinstance(in_buffer, tuple):
        in_name, in_range = in_buffer
    else:
        in_name = in_buffer

    desc = sdfg.arrays[in_name]
    conn = libnode.in_connectors
    conn = {
        c: (dtypes.pointer(desc.dtype) if c == '_buffer' else t)
        for c, t in conn.items()
    }
    libnode.in_connectors = conn
    in_node = state.add_read(in_name)

    if in_range:
        buf_mem = Memlet.simple(in_name, in_range)
    else:
        buf_mem = Memlet.from_array(in_name, desc)

    state.add_edge(in_node, None, libnode, '_inbuffer', buf_mem)

    return []
Ejemplo n.º 15
0
def test():
    print('Constant specialization test')

    N = dp.symbol('N')
    M = dp.symbol('M')
    N.set(20)
    M.set(30)
    fullrange = '1:N-1,0:M'
    irange = '1:N-1'
    jrange = '0:M'

    input = np.random.rand(N.get(), M.get()).astype(np.float32)
    output = dp.ndarray([N, M], dtype=dp.float32)
    output[:] = dp.float32(0)

    ##########################################################################
    spec_sdfg = SDFG('spectest')
    state = spec_sdfg.add_state()
    A = state.add_array('A', [N, M], dp.float32)
    Atrans = state.add_transient('At', [N - 2, M], dp.float32)
    B = state.add_array('B', [N, M], dp.float32)

    state.add_edge(A, None, Atrans, None, Memlet.simple(A, fullrange))
    _, me, mx = state.add_mapped_tasklet(
        'compute', dict(i=irange, j=jrange),
        dict(a=Memlet.simple(Atrans, 'i-1,j')), 'b = math.exp(a)',
        dict(b=Memlet.simple(B, 'i,j')))
    state.add_edge(Atrans, None, me, None, Memlet.simple(Atrans, fullrange))
    state.add_edge(mx, None, B, None, Memlet.simple(B, fullrange))

    spec_sdfg.fill_scope_connectors()
    dp.propagate_memlets_sdfg(spec_sdfg)
    spec_sdfg.validate()
    ##########################################################################

    code_nonspec = spec_sdfg.generate_code()

    assert 'Dynamic' in code_nonspec[0].code

    spec_sdfg.specialize(dict(N=N, M=M))
    code_spec = spec_sdfg.generate_code()

    assert 'Dynamic' not in code_spec[0].code

    func = spec_sdfg.compile()
    func(A=input, B=output, N=N, M=M)

    diff = np.linalg.norm(
        np.exp(input[1:(N.get() - 1), 0:M.get()]) - output[1:-1, :]) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
Ejemplo n.º 16
0
def make_nested_sdfg():
    sdfg = dace.SDFG('vol_propagation_nested')

    assign_loop_bound = sdfg.add_state('assign')
    guard_state = sdfg.add_state('guard')
    loop_state = sdfg.add_state('for')
    end_state = sdfg.add_state('endfor')

    sdfg.add_edge(assign_loop_bound, guard_state,
                  InterstateEdge(assignments={'i': '0'}))
    sdfg.add_edge(
        guard_state, loop_state,
        InterstateEdge(condition=CodeProperty.from_string(
            'i < loop_bound', language=Language.Python)))
    sdfg.add_edge(loop_state, guard_state,
                  InterstateEdge(assignments={'i': 'i+1'}))
    sdfg.add_edge(
        guard_state, end_state,
        InterstateEdge(condition=CodeProperty.from_string(
            'not (i < loop_bound)', language=Language.Python)))

    in_bound = assign_loop_bound.add_stream('IN_bound',
                                            dace.int32,
                                            storage=StorageType.FPGA_Local)
    loop_bound = assign_loop_bound.add_scalar(
        'loop_bound',
        dace.int32,
        transient=True,
        storage=StorageType.FPGA_Registers)
    assign_loop_bound.add_memlet_path(in_bound,
                                      loop_bound,
                                      memlet=Memlet.simple(loop_bound, '0'))

    in_a = loop_state.add_array('IN_a', [N],
                                dace.int32,
                                storage=StorageType.FPGA_Global)
    out_stream = loop_state.add_stream('OUT_stream',
                                       dace.int32,
                                       storage=StorageType.FPGA_Local)
    tasklet2 = loop_state.add_tasklet('compute', {'_IN_a'}, {'_OUT_stream'},
                                      '_OUT_stream = _IN_a[0]')
    loop_state.add_memlet_path(in_a,
                               tasklet2,
                               dst_conn='_IN_a',
                               memlet=Memlet.simple(in_a, '0:N'))
    loop_state.add_memlet_path(tasklet2,
                               out_stream,
                               src_conn='_OUT_stream',
                               memlet=Memlet.simple(out_stream, '0'))

    return sdfg
Ejemplo n.º 17
0
def nccl_recv(pv: 'ProgramVisitor',
              sdfg: SDFG,
              state: SDFGState,
              out_buffer: str,
              peer: symbolic.SymbolicType = 0,
              group_handle: str = None):

    inputs = set()
    outputs = {"_outbuffer"}

    if isinstance(group_handle, str):
        gh_start = False
        if group_handle in sdfg.arrays.keys():
            gh_name = group_handle
            gh_out = state.add_access(gh_name)
            gh_in = state.add_access(gh_name)
            inputs.add("_group_handle")
        else:
            gh_start = True
            gh_name = _define_local_scalar(pv, sdfg, state, dace.int32,
                                           dtypes.StorageType.GPU_Global)
            gh_out = state.add_access(gh_name)
        outputs.add("_group_handle")

    libnode = Recv(inputs=inputs, outputs=outputs, peer=peer)

    if isinstance(group_handle, str):
        gh_memlet = Memlet.simple(gh_name, '0')
        if not gh_start:
            state.add_edge(gh_in, None, libnode, "_group_handle", gh_memlet)
        state.add_edge(libnode, "_group_handle", gh_out, None, gh_memlet)

    out_range = None
    if isinstance(out_buffer, tuple):
        out_name, out_range = out_buffer
        out_node = state.add_write(out_name)
    elif isinstance(out_buffer, str) and out_buffer in sdfg.arrays.keys():
        out_name = out_buffer
        out_node = state.add_write(out_name)
    else:
        raise ValueError(
            "NCCL_Recv out_buffer must be an array, or a an array range tuple.")

    if out_range:
        out_mem = Memlet.simple(out_name, out_range)
    else:
        out_mem = Memlet.simple(out_name, '0')

    state.add_edge(libnode, '_outbuffer', out_node, None, out_mem)

    return []
Ejemplo n.º 18
0
def make_compute_sdfg():

    sdfg = SDFG("spmv_compute")

    pre_state, body, post_state = make_iteration_space(sdfg)

    a_pipe = body.add_stream("a_pipe", dtype, storage=StorageType.FPGA_Local)
    x_pipe = body.add_stream("x_pipe", dtype, storage=StorageType.FPGA_Local)
    b_buffer_in = body.add_scalar("b_buffer",
                                  dtype,
                                  transient=True,
                                  storage=StorageType.FPGA_Registers)
    b_buffer_out = body.add_scalar("b_buffer",
                                   dtype,
                                   transient=True,
                                   storage=StorageType.FPGA_Registers)
    nested_sdfg = make_compute_nested_sdfg()
    tasklet = body.add_nested_sdfg(nested_sdfg, sdfg, {"a_in", "x_in", "b_in"},
                                   {"b_out"})
    body.add_memlet_path(a_pipe,
                         tasklet,
                         dst_conn="a_in",
                         memlet=Memlet.simple(a_pipe, "0"))
    body.add_memlet_path(b_buffer_in,
                         tasklet,
                         dst_conn="b_in",
                         memlet=Memlet.simple(b_buffer_in, "0"))
    body.add_memlet_path(x_pipe,
                         tasklet,
                         dst_conn="x_in",
                         memlet=Memlet.simple(x_pipe, "0"))
    body.add_memlet_path(tasklet,
                         b_buffer_out,
                         src_conn="b_out",
                         memlet=Memlet.simple(b_buffer_out, "0"))

    b_buffer_post_in = post_state.add_scalar("b_buffer",
                                             dtype,
                                             transient=True,
                                             storage=StorageType.FPGA_Registers)
    b_pipe = post_state.add_stream("b_pipe",
                                   dtype,
                                   storage=StorageType.FPGA_Local)
    post_state.add_memlet_path(b_buffer_post_in,
                               b_pipe,
                               memlet=Memlet.simple(b_pipe, "0"))

    return sdfg
Ejemplo n.º 19
0
def make_write_sdfg():

    sdfg = SDFG("spmv_write")

    begin = sdfg.add_state("begin")
    entry = sdfg.add_state("entry")
    state = sdfg.add_state("body")
    end = sdfg.add_state("end")

    sdfg.add_edge(begin, entry, InterstateEdge(assignments={"h": "0"}))

    sdfg.add_edge(
        entry, state,
        InterstateEdge(condition=CodeProperty.from_string(
            "h < H", language=Language.Python)))

    sdfg.add_edge(
        entry, end,
        InterstateEdge(condition=CodeProperty.from_string(
            "h >= H", language=Language.Python)))

    sdfg.add_edge(state, entry, InterstateEdge(assignments={"h": "h + 1"}))

    result_to_write_in = state.add_stream("b_pipe",
                                          dtype,
                                          storage=StorageType.FPGA_Local)
    b = state.add_array("b_mem", (H, ), dtype, storage=StorageType.FPGA_Global)

    state.add_memlet_path(result_to_write_in, b, memlet=Memlet.simple(b, "h"))

    return sdfg
Ejemplo n.º 20
0
def _wait(pv: 'ProgramVisitor', sdfg: SDFG, state: SDFGState, request: str):

    from dace.libraries.mpi.nodes.wait import Wait

    libnode = Wait('_Wait_')

    req_range = None
    if isinstance(request, tuple):
        req_name, req_range = request
    else:
        req_name = request

    desc = sdfg.arrays[req_name]
    req_node = state.add_access(req_name)

    src = sdfg.add_temp_transient([1], dtypes.int32)
    src_node = state.add_write(src[0])
    tag = sdfg.add_temp_transient([1], dtypes.int32)
    tag_node = state.add_write(tag[0])

    if req_range:
        req_mem = Memlet.simple(req_name, req_range)
    else:
        req_mem = Memlet.from_array(req_name, desc)

    state.add_edge(req_node, None, libnode, '_request', req_mem)
    state.add_edge(libnode, '_stat_source', src_node, None,
                   Memlet.from_array(*src))
    state.add_edge(libnode, '_stat_tag', tag_node, None,
                   Memlet.from_array(*tag))

    return None
Ejemplo n.º 21
0
def parse_memlet(visitor, src: MemletType, dst: MemletType,
                 defined_arrays_and_symbols: Dict[str, data.Data]):
    srcexpr, dstexpr, localvar = None, None, None
    if isinstance(src,
                  ast.Name) and rname(src) not in defined_arrays_and_symbols:
        localvar = rname(src)
    else:
        srcexpr = ParseMemlet(visitor, defined_arrays_and_symbols, src)
    if isinstance(dst,
                  ast.Name) and rname(dst) not in defined_arrays_and_symbols:
        if localvar is not None:
            raise DaceSyntaxError(
                visitor, src,
                'Memlet source and destination cannot both be local variables')
        localvar = rname(dst)
    else:
        dstexpr = ParseMemlet(visitor, defined_arrays_and_symbols, dst)

    if srcexpr is not None and dstexpr is not None:
        # Create two memlets
        raise NotImplementedError
    elif srcexpr is not None:
        expr = srcexpr
    else:
        expr = dstexpr

    return localvar, Memlet.simple(expr.name,
                                   expr.subset,
                                   num_accesses=expr.accesses,
                                   wcr_str=expr.wcr)
Ejemplo n.º 22
0
def make_sdfg():
    sdfg = dace.SDFG('vol_propagation')

    sdfg.add_symbol('N', dace.int32)
    sdfg.add_symbol('M', dace.int32)

    state = sdfg.add_state('main')

    a_in = state.add_array('A_in', [N], dace.int32,
                           storage=StorageType.FPGA_Global)
    bound_pipe = state.add_stream('bound_in', dace.int32, transient=True,
                                  storage=StorageType.FPGA_Local)
    out_stream = state.add_stream('out_stream', dace.int32, transient=True,
                                  storage=StorageType.FPGA_Local)

    nest = state.add_nested_sdfg(
        make_nested_sdfg(),
        sdfg,
        {
            'IN_a',
            'IN_bound',
        },
        {
            'OUT_stream',
        }
    )

    state.add_memlet_path(
        a_in,
        nest,
        dst_conn='IN_a',
        memlet=Memlet.simple(a_in, '0:N')
    )
    state.add_memlet_path(
        bound_pipe,
        nest,
        dst_conn='IN_bound',
        memlet=Memlet.simple(bound_pipe, '0', num_accesses=-1)
    )
    state.add_memlet_path(
        nest,
        out_stream,
        src_conn='OUT_stream',
        memlet=Memlet.simple(out_stream, '0', num_accesses=-1)
    )

    return sdfg
Ejemplo n.º 23
0
def _simple_call(sdfg: SDFG,
                 state: SDFGState,
                 inpname: str,
                 func: str,
                 restype: dace.typeclass = None):
    """ Implements a simple call of the form `out = func(inp)`. """
    inparr = sdfg.arrays[inpname]
    if restype is None:
        restype = sdfg.arrays[inpname].dtype
    outname, outarr = sdfg.add_temp_transient(inparr.shape, restype,
                                              inparr.storage)
    num_elements = reduce(lambda x, y: x * y, inparr.shape)
    if num_elements == 1:
        inp = state.add_read(inpname)
        out = state.add_write(outname)
        tasklet = state.add_tasklet(func, {'__inp'}, {'__out'},
                                    '__out = {f}(__inp)'.format(f=func))
        state.add_edge(inp, None, tasklet, '__inp',
                       Memlet.from_array(inpname, inparr))
        state.add_edge(tasklet, '__out', out, None,
                       Memlet.from_array(outname, outarr))
    else:
        state.add_mapped_tasklet(
            name=func,
            map_ranges={
                '__i%d' % i: '0:%s' % n
                for i, n in enumerate(inparr.shape)
            },
            inputs={
                '__inp':
                Memlet.simple(
                    inpname,
                    ','.join(['__i%d' % i for i in range(len(inparr.shape))]))
            },
            code='__out = {f}(__inp)'.format(f=func),
            outputs={
                '__out':
                Memlet.simple(
                    outname,
                    ','.join(['__i%d' % i for i in range(len(inparr.shape))]))
            },
            external_edges=True)

    return outname
Ejemplo n.º 24
0
def _binop(sdfg: SDFG, state: SDFGState, op1: str, op2: str, opcode: str,
           opname: str, restype: dace.typeclass):
    """ Implements a general element-wise array binary operator. """
    arr1 = sdfg.arrays[op1]
    arr2 = sdfg.arrays[op2]

    out_shape, all_idx_dict, all_idx, arr1_idx, arr2_idx = _broadcast_together(
        arr1.shape, arr2.shape)

    name, _ = sdfg.add_temp_transient(out_shape, restype, arr1.storage)
    state.add_mapped_tasklet("_%s_" % opname,
                             all_idx_dict, {
                                 '__in1': Memlet.simple(op1, arr1_idx),
                                 '__in2': Memlet.simple(op2, arr2_idx)
                             },
                             '__out = __in1 %s __in2' % opcode,
                             {'__out': Memlet.simple(name, all_idx)},
                             external_edges=True)
    return name
Ejemplo n.º 25
0
def _unop(sdfg: SDFG, state: SDFGState, op1: str, opcode: str, opname: str):
    """ Implements a general element-wise array unary operator. """
    arr1 = sdfg.arrays[op1]

    name, _ = sdfg.add_temp_transient(arr1.shape, arr1.dtype, arr1.storage)
    state.add_mapped_tasklet(
        "_%s_" % opname,
        {'__i%d' % i: '0:%s' % s
         for i, s in enumerate(arr1.shape)}, {
             '__in1':
             Memlet.simple(
                 op1, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))
         },
        '__out = %s __in1' % opcode, {
            '__out':
            Memlet.simple(
                name, ','.join(['__i%d' % i for i in range(len(arr1.shape))]))
        },
        external_edges=True)
    return name
Ejemplo n.º 26
0
def test():
    print('Multidimensional offset and stride test')
    # Externals (parameters, symbols)
    N = dp.symbol('N')
    N.set(20)
    input = dp.ndarray([N, N], dp.float32)
    output = dp.ndarray([4, 3], dp.float32)
    input[:] = (np.random.rand(N.get(), N.get()) * 5).astype(dp.float32.type)
    output[:] = dp.float32(0)

    # Construct SDFG
    mysdfg = SDFG('offset_stride')
    state = mysdfg.add_state()
    A_ = state.add_array('A', [6, 6],
                         dp.float32,
                         offset=[2, 3],
                         strides=[N, 1],
                         total_size=N * N)
    B_ = state.add_array('B', [3, 2],
                         dp.float32,
                         offset=[-1, -1],
                         strides=[3, 1],
                         total_size=12)

    map_entry, map_exit = state.add_map('mymap', [('i', '1:4'), ('j', '1:3')])
    tasklet = state.add_tasklet('mytasklet', {'a'}, {'b'}, 'b = a')
    state.add_edge(map_entry, None, tasklet, 'a', Memlet.simple(A_, 'i,j'))
    state.add_edge(tasklet, 'b', map_exit, None, Memlet.simple(B_, 'i,j'))

    # Add outer edges
    state.add_edge(A_, None, map_entry, None, Memlet.simple(A_, '1:4,1:3'))
    state.add_edge(map_exit, None, B_, None, Memlet.simple(B_, '1:4,1:3'))

    mysdfg(A=input, B=output, N=N)

    diff = np.linalg.norm(output[0:3, 0:2] - input[3:6, 4:6]) / N.get()
    print("Difference:", diff)
    assert diff <= 1e-5
Ejemplo n.º 27
0
def make_sdfg(dtype):

    n = dace.symbol("n")

    sdfg = dace.SDFG("mpi_allreduce")
    state = sdfg.add_state("dataflow")

    sdfg.add_array("inbuf", [n], dtype, transient=False)
    sdfg.add_array("outbuf", [n], dtype, transient=False)
    inbuf = state.add_access("inbuf")
    outbuf = state.add_access("outbuf")
    allreduce_node = mpi.nodes.allreduce.Allreduce("allreduce")

    state.add_memlet_path(inbuf,
                          allreduce_node,
                          dst_conn="_inbuffer",
                          memlet=Memlet.simple(inbuf, "0:n", num_accesses=n))
    state.add_memlet_path(allreduce_node,
                          outbuf,
                          src_conn="_outbuffer",
                          memlet=Memlet.simple(outbuf, "0:n", num_accesses=n))

    return sdfg
Ejemplo n.º 28
0
def nccl_reduce(pv: 'ProgramVisitor',
                sdfg: SDFG,
                state: SDFGState,
                redfunction: Callable[[Any, Any], Any],
                in_buffer: str,
                out_buffer: Union[str, None] = None,
                root: str = None,
                group_handle: str = None):

    inputs = {"_inbuffer"}
    outputs = {"_outbuffer"}

    if isinstance(group_handle, str):
        gh_start = False
        if group_handle in sdfg.arrays.keys():
            gh_name = group_handle
            gh_out = state.add_access(gh_name)
            gh_in = state.add_access(gh_name)
            inputs.add("_group_handle")
        else:
            gh_start = True
            gh_name = _define_local_scalar(pv, sdfg, state, dace.int32,
                                           dtypes.StorageType.GPU_Global)
            gh_out = state.add_access(gh_name)
        outputs.add("_group_handle")

    libnode = Reduce(inputs=inputs,
                     outputs=outputs,
                     wcr=redfunction,
                     root=root)

    if isinstance(group_handle, str):
        gh_memlet = Memlet.simple(gh_name, '0')
        if not gh_start:
            state.add_edge(gh_in, None, libnode, "_group_handle", gh_memlet)
        state.add_edge(libnode, "_group_handle", gh_out, None, gh_memlet)

    # If out_buffer is not specified, the operation will be in-place.
    if out_buffer is None:
        out_buffer = in_buffer

    # Add nodes
    in_node = state.add_read(in_buffer)
    out_node = state.add_write(out_buffer)

    # Connect nodes
    state.add_edge(in_node, None, libnode, '_inbuffer', Memlet(in_buffer))
    state.add_edge(libnode, '_outbuffer', out_node, None, Memlet(out_buffer))

    return []
Ejemplo n.º 29
0
def make_sdfg(dtype):

    n = dace.symbol("n")
    p = dace.symbol("p")

    sdfg = dace.SDFG("mpi_allgather")
    state = sdfg.add_state("dataflow")

    sdfg.add_array("inA", [n], dtype, transient=False)
    sdfg.add_array("outA", [n * p], dtype, transient=False)
    inA = state.add_access("inA")
    outA = state.add_access("outA")
    allgather_node = mpi.nodes.allgather.Allgather("allgather")

    state.add_memlet_path(inA,
                          allgather_node,
                          dst_conn="_inbuffer",
                          memlet=Memlet.simple(inA, "0:n", num_accesses=n))
    state.add_memlet_path(allgather_node,
                          outA,
                          src_conn="_outbuffer",
                          memlet=Memlet.simple(outA, "0:n*p", num_accesses=1))

    return sdfg
Ejemplo n.º 30
0
def test():
    print('SDFG multiple tasklet test')
    # Externals (parameters, symbols)
    N = dp.symbol('N')
    N.set(20)
    input = dp.ndarray([N], dp.int64)
    sum = dp.ndarray([1], dp.int64)
    product = dp.ndarray([1], dp.int64)
    input[:] = dp.int64(5)
    sum[:] = dp.int64(0)
    product[:] = dp.int64(1)

    # Construct SDFG
    mysdfg = SDFG('multiple_cr')
    state = mysdfg.add_state()
    A = state.add_array('A', [N], dp.int64)
    s = state.add_array('s', [1], dp.int64)
    p = state.add_array('p', [1], dp.int64)

    map_entry, map_exit = state.add_map('mymap', dict(i='0:N'))
    state.add_edge(A, None, map_entry, None, Memlet.simple(A, '0:N'))

    # Tasklet 1
    t1 = state.add_tasklet('task1', {'a'}, {'b'}, 'b = a')
    state.add_edge(map_entry, None, t1, 'a', Memlet.simple(A, 'i'))
    state.add_edge(t1, 'b', map_exit, None,
                   Memlet.simple(s, '0', wcr_str='lambda a,b: a+b'))
    state.add_edge(map_exit, None, s, None, Memlet.simple(s, '0'))

    # Tasklet 2
    t2 = state.add_tasklet('task2', {'a'}, {'b'}, 'b = a')
    state.add_edge(map_entry, None, t2, 'a', Memlet.simple(A, 'i'))
    state.add_edge(t2, 'b', map_exit, None,
                   Memlet.simple(p, '0', wcr_str='lambda a,b: a*b'))
    state.add_edge(map_exit, None, p, None, Memlet.simple(p, '0'))

    mysdfg(A=input, s=sum, p=product, N=N)

    diff_sum = 5 * 20 - sum[0]
    diff_prod = 5**20 - product[0]
    print("Difference:", diff_sum, '(sum)', diff_prod, '(product)')
    assert diff_sum <= 1e-5 and diff_prod <= 1e-5