Ejemplo n.º 1
0
def create_test_sdfg():
    sdfg = dace.SDFG('test_sdfg')

    sdfg.add_array('BETA', shape=[10], dtype=dace.float32)
    sdfg.add_array('BETA_MAX', shape=[1], dtype=dace.float32)

    init_state = sdfg.add_state("init")
    state = sdfg.add_state("compute")

    sdfg.add_edge(init_state, state, dace.InterstateEdge())

    for arr in ['BETA_MAX']:
        create_zero_initialization(init_state, arr)

    BETA_MAX = state.add_access('BETA_MAX')
    BETA = state.add_access('BETA')

    beta_max_reduce = state.add_reduce(wcr="lambda a, b: max(a, b)",
                                       axes=(0, ),
                                       identity=-999999)
    beta_max_reduce.implementation = 'CUDA (device)'
    state.add_edge(BETA, None, beta_max_reduce, None,
                   dace.memlet.Memlet.simple(BETA.data, '0:10'))
    state.add_edge(beta_max_reduce, None, BETA_MAX, None,
                   dace.memlet.Memlet.simple(BETA_MAX.data, '0:1'))

    return sdfg
Ejemplo n.º 2
0
def test_allocation_static():
    """
    Allocate an array with a constant-propagated symbolic size.
    """
    sdfg = dace.SDFG('cprop_static_alloc')
    N = dace.symbol('N', dace.int32)
    sdfg.add_symbol('N', dace.int32)
    sdfg.add_array('tmp', [N], dace.int32, transient=True)
    sdfg.add_array('output', [1], dace.int32)

    a = sdfg.add_state()
    b = sdfg.add_state()
    c = sdfg.add_state_after(b)

    # First state, N=1
    sdfg.add_edge(a, b, dace.InterstateEdge(assignments=dict(N=1)))
    t = b.add_tasklet('somecode', {}, {'out'}, 'out = 2')
    w = b.add_write('tmp')
    b.add_edge(t, 'out', w, None, dace.Memlet('tmp'))

    # Third state outputs value
    c.add_nedge(c.add_read('tmp'), c.add_write('output'),
                dace.Memlet('tmp[0]'))

    # Do not perform scalar-to-symbol promotion
    ConstantPropagation().apply_pass(sdfg, {})

    assert len(sdfg.symbols) == 0

    val = np.random.rand(1).astype(np.int32)
    sdfg(output=val)
    assert np.allclose(val, 2)
Ejemplo n.º 3
0
def test_two_to_one_cc_fusion():
    """ Two states, first with two connected components, second with one. """
    sdfg = dace.SDFG('state_fusion_test')
    sdfg.add_array('A', [1], dace.int32)
    sdfg.add_array('B', [1], dace.int32)
    sdfg.add_array('C', [1], dace.int32)
    state1, state2 = tuple(sdfg.add_state() for _ in range(2))
    sdfg.add_edge(state1, state2, dace.InterstateEdge())

    # First state
    state1.add_edge(state1.add_tasklet('one', {}, {'a'}, 'a = 1'), 'a',
                    state1.add_write('A'), None, dace.Memlet('A'))

    t2 = state1.add_tasklet('two', {}, {'b', 'c'}, 'b = 2; c = 3')
    state1.add_edge(t2, 'b', state1.add_write('B'), None, dace.Memlet('B'))
    state1.add_edge(t2, 'c', state1.add_write('C'), None, dace.Memlet('C'))

    # Second state
    t2 = state2.add_tasklet('three', {'a', 'b', 'c'}, {'out'}, 'out = a+b+c')
    state2.add_edge(state2.add_read('A'), None, t2, 'a', dace.Memlet('A'))
    state2.add_edge(state2.add_read('B'), None, t2, 'b', dace.Memlet('B'))
    state2.add_edge(state2.add_read('C'), None, t2, 'c', dace.Memlet('C'))
    state2.add_edge(t2, 'out', state2.add_write('C'), None, dace.Memlet('C'))

    assert sdfg.apply_transformations_repeated(StateFusion, strict=True) == 1
Ejemplo n.º 4
0
def make_sdfg(specialized):

    if specialized:
        sdfg = dace.SDFG("mm_fpga_stream_{}x{}x{}".format(
            N.get(), K.get(), M.get()))
    else:
        sdfg = dace.SDFG("mm_fpga_stream_NxKx{}".format(M.get()))

    pre_state = make_copy_to_fpga_state(sdfg)
    compute_state = make_fpga_state(sdfg)
    post_state = make_copy_to_host_state(sdfg)

    sdfg.add_edge(pre_state, compute_state, dace.InterstateEdge())
    sdfg.add_edge(compute_state, post_state, dace.InterstateEdge())

    return sdfg
Ejemplo n.º 5
0
def test_two_cc_fusion_together():
    """ Two states, both with two connected components, fused to one CC. """
    sdfg = dace.SDFG('state_fusion_test')
    sdfg.add_array('A', [1], dace.int32)
    sdfg.add_array('B', [1], dace.int32)
    sdfg.add_array('C', [1], dace.int32)
    state1, state2 = tuple(sdfg.add_state() for _ in range(2))
    sdfg.add_edge(state1, state2, dace.InterstateEdge())

    # First state
    state1.add_edge(state1.add_tasklet('one', {}, {'a'}, 'a = 1'), 'a',
                    state1.add_write('A'), None, dace.Memlet('A'))

    t2 = state1.add_tasklet('two', {}, {'b', 'c'}, 'b = 2; c = 3')
    state1.add_edge(t2, 'b', state1.add_write('B'), None, dace.Memlet('B'))
    state1.add_edge(t2, 'c', state1.add_write('C'), None, dace.Memlet('C'))

    # Second state
    state2.add_edge(state2.add_read('B'), None,
                    state2.add_tasklet('one', {'a'}, {}, ''), 'a',
                    dace.Memlet('B'))

    t2 = state2.add_tasklet('two', {'b', 'c'}, {'d', 'e'}, 'd = b + c; e = b')
    state2.add_edge(state2.add_read('A'), None, t2, 'b', dace.Memlet('A'))
    state2.add_edge(state2.add_read('C'), None, t2, 'c', dace.Memlet('C'))
    state2.add_edge(t2, 'd', state2.add_write('A'), None, dace.Memlet('A'))
    state2.add_edge(t2, 'e', state2.add_write('C'), None, dace.Memlet('C'))

    assert sdfg.apply_transformations_repeated(StateFusion) == 1
Ejemplo n.º 6
0
    def test_nested_transient(self):
        """ Test nested SDFGs with transients. """

        # Inner SDFG
        nsdfg = dace.SDFG('nested')
        nsdfg.add_array('a', [1], dace.float64)
        nsdfg.add_array('b', [1], dace.float64)
        nsdfg.add_transient('t', [1], dace.float64)

        # a->t state
        nstate = nsdfg.add_state()
        irnode = nstate.add_read('a')
        task = nstate.add_tasklet('t1', {'inp'}, {'out'}, 'out = 2*inp')
        iwnode = nstate.add_write('t')
        nstate.add_edge(irnode, None, task, 'inp',
                        dace.Memlet.simple('a', '0'))
        nstate.add_edge(task, 'out', iwnode, None,
                        dace.Memlet.simple('t', '0'))

        # t->a state
        first_state = nstate
        nstate = nsdfg.add_state()
        irnode = nstate.add_read('t')
        task = nstate.add_tasklet('t2', {'inp'}, {'out'}, 'out = 3*inp')
        iwnode = nstate.add_write('b')
        nstate.add_edge(irnode, None, task, 'inp',
                        dace.Memlet.simple('t', '0'))
        nstate.add_edge(task, 'out', iwnode, None,
                        dace.Memlet.simple('b', '0'))

        nsdfg.add_edge(first_state, nstate, dace.InterstateEdge())

        # Outer SDFG
        sdfg = dace.SDFG('nested_transient_fission')
        sdfg.add_array('A', [2], dace.float64)
        state = sdfg.add_state()
        rnode = state.add_read('A')
        wnode = state.add_write('A')
        me, mx = state.add_map('outer', dict(i='0:2'))
        nsdfg_node = state.add_nested_sdfg(nsdfg, None, {'a'}, {'b'})
        state.add_memlet_path(rnode,
                              me,
                              nsdfg_node,
                              dst_conn='a',
                              memlet=dace.Memlet.simple('A', 'i'))
        state.add_memlet_path(nsdfg_node,
                              mx,
                              wnode,
                              src_conn='b',
                              memlet=dace.Memlet.simple('A', 'i'))

        self.assertGreater(sdfg.apply_transformations(MapFission), 0)

        # Test
        A = np.random.rand(2)
        expected = A * 6
        sdfg(A=A)
        self.assertTrue(np.allclose(A, expected))
Ejemplo n.º 7
0
def test_allocation_varying(parametric):
    """
    Allocate an array with an initial (symbolic) size, then allocate an array with another size, and ensure
    constants are propagated properly.
    """
    sdfg = dace.SDFG(f'cprop_alloc_{parametric}')
    N = dace.symbol('N', dace.int32)
    sdfg.add_symbol('N', dace.int32)
    sdfg.add_array('tmp1', [N], dace.int32, transient=True)
    sdfg.add_array('tmp2', [N], dace.int32, transient=True)
    sdfg.add_array('output', [1], dace.int32)

    a = sdfg.add_state()
    b = sdfg.add_state()
    c = sdfg.add_state()

    # First state, N=1
    sdfg.add_edge(a, b, dace.InterstateEdge(assignments=dict(N=1)))
    t = b.add_tasklet('somecode', {}, {'out'}, 'out = 2')
    w = b.add_write('tmp1')
    b.add_edge(t, 'out', w, None, dace.Memlet('tmp1[0]'))

    # Second state, N=tmp1[0] (=2)
    if parametric:
        sdfg.add_edge(b, c, dace.InterstateEdge(assignments=dict(N='tmp1[0]')))
    else:
        sdfg.add_edge(b, c, dace.InterstateEdge(assignments=dict(N=2)))
    t2 = c.add_tasklet('somecode2', {}, {'out'}, 'out = 3')
    t3 = c.add_tasklet('somecode2', {}, {'out'}, 'out = 4')
    w = c.add_write('tmp2')
    c.add_edge(t2, 'out', w, None, dace.Memlet('tmp2[0]'))
    c.add_edge(t3, 'out', w, None, dace.Memlet('tmp2[1]'))

    # Third state outputs value
    c.add_nedge(w, c.add_write('output'), dace.Memlet('tmp2[1]'))

    # Do not perform scalar-to-symbol promotion
    ConstantPropagation().apply_pass(sdfg, {})

    assert len(sdfg.symbols) == 1

    val = np.random.rand(1).astype(np.int32)
    sdfg(output=val)
    assert np.allclose(val, 4)
Ejemplo n.º 8
0
def test_nested_promotion_connector(with_subscript):
    # Construct SDFG
    postfix = 'a'
    if with_subscript:
        postfix = 'b'
    sdfg = dace.SDFG('testprog14{}'.format(postfix))
    sdfg.add_array('A', [20, 20], dace.float64)
    sdfg.add_array('B', [1], dace.float64)
    sdfg.add_transient('scal', [1], dace.int32)
    initstate = sdfg.add_state()
    initstate.add_edge(initstate.add_tasklet('do', {}, {'out'}, 'out = 5'),
                       'out', initstate.add_write('scal'), None,
                       dace.Memlet('scal'))
    state = sdfg.add_state_after(initstate)

    nsdfg = dace.SDFG('nested')
    nsdfg.add_array('a', [20, 20], dace.float64)
    nsdfg.add_array('b', [1], dace.float64)
    nsdfg.add_array('s', [1], dace.int32)
    nsdfg.add_symbol('s2', dace.int32)
    nstate1 = nsdfg.add_state()
    nstate2 = nsdfg.add_state()
    nsdfg.add_edge(
        nstate1, nstate2,
        dace.InterstateEdge(assignments=dict(
            s2='s[0]' if with_subscript else 's')))
    a = nstate2.add_read('a')
    t = nstate2.add_tasklet('do', {'inp'}, {'out'}, 'out = inp')
    b = nstate2.add_write('b')
    nstate2.add_edge(a, None, t, 'inp', dace.Memlet('a[s2, s2 + 1]'))
    nstate2.add_edge(t, 'out', b, None, dace.Memlet('b[0]'))

    nnode = state.add_nested_sdfg(nsdfg, None, {'a', 's'}, {'b'})
    aouter = state.add_read('A')
    souter = state.add_read('scal')
    bouter = state.add_write('B')
    state.add_edge(aouter, None, nnode, 'a', dace.Memlet('A'))
    state.add_edge(souter, None, nnode, 's', dace.Memlet('scal'))
    state.add_edge(nnode, 'b', bouter, None, dace.Memlet('B'))
    #######################################################

    # Promotion
    assert scalar_to_symbol.find_promotable_scalars(sdfg) == {'scal'}
    scalar_to_symbol.promote_scalars_to_symbols(sdfg)
    sdfg.coarsen_dataflow()

    assert sdfg.number_of_nodes() == 1
    assert sdfg.node(0).number_of_nodes() == 3
    assert not any(isinstance(n, dace.nodes.NestedSDFG) for n in sdfg.node(0))

    # Correctness
    A = np.random.rand(20, 20)
    B = np.random.rand(1)
    sdfg(A=A, B=B)
    assert B[0] == A[5, 6]
Ejemplo n.º 9
0
def test_sub_grid():

    P = dace.symbol('P', dace.int32)

    sdfg = dace.SDFG("sub_grid_test")
    sdfg.add_symbol('P', dace.int32)
    _, darr = sdfg.add_array("dims", (1, ), dtype=dace.int32)
    _, parr = sdfg.add_array("periods", (1, ), dtype=dace.int32)
    _, carr = sdfg.add_array("coords", (1, ), dtype=dace.int32)
    _, varr = sdfg.add_array("valid", (1, ), dtype=dace.bool_)

    state = sdfg.add_state("start")
    parent_pgrid_name = comm._cart_create(None, sdfg, state, [1, P])
    pgrid_name = comm._cart_sub(None, sdfg, state, parent_pgrid_name,
                                [False, True])

    state2 = sdfg.add_state("main")
    sdfg.add_edge(state, state2, dace.InterstateEdge())
    tasklet = state2.add_tasklet(
        "MPI_Cart_get", {}, {'d', 'p', 'c', 'v'},
        f"MPI_Cart_get(__state->{pgrid_name}_comm, P, &d, &p, &c);\nv = __state->{pgrid_name}_valid;",
        dtypes.Language.CPP)
    dims = state2.add_write("dims")
    periods = state2.add_write("periods")
    coords = state2.add_write("coords")
    valid = state2.add_write("valid")
    state2.add_edge(tasklet, 'd', dims, None,
                    dace.Memlet.from_array("dims", darr))
    state2.add_edge(tasklet, 'p', periods, None,
                    dace.Memlet.from_array("periods", parr))
    state2.add_edge(tasklet, 'c', coords, None,
                    dace.Memlet.from_array("coords", carr))
    state2.add_edge(tasklet, 'v', valid, None, dace.Memlet("valid[0]"))

    from mpi4py import MPI
    commworld = MPI.COMM_WORLD
    rank = commworld.Get_rank()
    size = commworld.Get_size()

    if size < 2:
        raise ValueError("Please run this test with at least two processes.")

    func = utils.distributed_compile(sdfg, commworld)

    dims = np.zeros((1, ), dtype=np.int32)
    periods = np.zeros((1, ), dtype=np.int32)
    coords = np.zeros((1, ), dtype=np.int32)
    valid = np.zeros((1, ), dtype=np.bool_)
    func(dims=dims, periods=periods, coords=coords, valid=valid, P=size)

    assert (np.array_equal(dims, [size]))
    assert (np.array_equal(periods, [0]))
    assert (np.array_equal(coords, [rank]))
    assert (valid[0])
Ejemplo n.º 10
0
def test():
    sdfg = dace.SDFG('toplevel_interstate_test')
    _, tmpdesc = sdfg.add_transient('tmp', [1], dace.int32)

    # State that sets tmp
    state = sdfg.add_state()
    tasklet = state.add_tasklet('settmp', {}, {'t'}, 't = 5')
    wtmp = state.add_write('tmp')
    state.add_edge(tasklet, 't', wtmp, None,
                   dace.Memlet.from_array('tmp', tmpdesc))

    # States that uses tmp implicitly (only in interstate edge)
    state2 = sdfg.add_state()
    state2.add_tasklet('sayhi', {}, {}, 'printf("OK\\n")')
    state3 = sdfg.add_state()
    state3.add_tasklet('saybye', {}, {}, 'printf("FAIL\\n")')

    # Conditional edges that use tmp
    sdfg.add_edge(state, state2, dace.InterstateEdge('tmp[0] > 2'))
    sdfg.add_edge(state, state3, dace.InterstateEdge('tmp[0] <= 2'))
Ejemplo n.º 11
0
 def test_state_duplication(self):
     try:
         sdfg = dace.SDFG('ok')
         s1 = sdfg.add_state('also_ok')
         s2 = sdfg.add_state('also_ok')
         s2.set_label('also_ok')
         sdfg.add_edge(s1, s2, dace.InterstateEdge())
         sdfg.validate()
         self.fail('Failed to detect duplicate state')
     except dace.sdfg.InvalidSDFGError as ex:
         print('Exception caught:', ex)
Ejemplo n.º 12
0
def test_fuse_assignments_2():
    """
    Two states in which the first's state's input assignment depends on a symbol assigned (again)
    on the common interstate edge. Should fail.
    """
    sdfg = dace.SDFG('state_fusion_test')
    state1 = sdfg.add_state()
    state2 = sdfg.add_state()
    state3 = sdfg.add_state()
    state4 = sdfg.add_state()
    state5 = sdfg.add_state()
    sdfg.add_edge(state1, state2, dace.InterstateEdge(assignments=dict(k=1)))
    sdfg.add_edge(state2, state3,
                  dace.InterstateEdge(assignments=dict(k='k + 1')))
    sdfg.add_edge(state3, state4,
                  dace.InterstateEdge(assignments=dict(l='k + 1')))
    sdfg.add_edge(state4, state5,
                  dace.InterstateEdge(assignments=dict(k='k + 1')))
    sdfg.apply_transformations_repeated(StateFusion)
    assert sdfg.number_of_nodes() == 5
Ejemplo n.º 13
0
def test_2d_assignment():
    sdfg = dace.SDFG('assign2d')
    sdfg.add_array('A', [4, 2], dace.float64)
    state = sdfg.add_state()
    state2 = sdfg.add_state()
    state2.add_edge(state2.add_tasklet('assign', {}, {'a'}, 'a = i'), 'a', state2.add_write('A'), None,
                    dace.Memlet('A[0, 0]'))
    sdfg.add_edge(state, state2, dace.InterstateEdge(assignments=dict(i='A[1, 1]')))

    A = np.random.rand(4, 2)
    sdfg(A=A)
    assert np.allclose(A[0, 0], A[1, 1])
Ejemplo n.º 14
0
def test_dowhile():
    sdfg = dace.SDFG('dowhiletest')
    sdfg.add_array('A', [1], dace.int32)
    init = sdfg.add_state()
    state1 = sdfg.add_state()
    sdfg.add_edge(init, state1, dace.InterstateEdge(assignments={'cond': '1'}))
    state2 = sdfg.add_state()
    sdfg.add_edge(state1, state2, dace.InterstateEdge(assignments={'cond': 'cond + 1'}))
    guard = sdfg.add_state_after(state2)
    after = sdfg.add_state()
    sdfg.add_edge(guard, state1, dace.InterstateEdge('cond < 5'))
    sdfg.add_edge(guard, after, dace.InterstateEdge('cond >= 5'))

    t = state1.add_tasklet('something', {'a'}, {'o'}, 'o = a + 1')
    r = state1.add_read('A')
    w = state1.add_write('A')
    state1.add_edge(r, None, t, 'a', dace.Memlet('A'))
    state1.add_edge(t, 'o', w, None, dace.Memlet('A'))

    A = np.zeros([1], dtype=np.int32)
    sdfg(A=A)
    assert A[0] == 4
Ejemplo n.º 15
0
def test_fsm():
    # Could be interpreted as a while loop of a switch-case
    sdfg = dace.SDFG('fsmtest')
    sdfg.add_scalar('nextstate', dace.int32)
    sdfg.add_array('A', [1], dace.int32)
    start = sdfg.add_state()
    init = sdfg.add_state_after(start)
    case0 = sdfg.add_state()
    case1 = sdfg.add_state()
    case3 = sdfg.add_state()
    case5 = sdfg.add_state()
    estate = sdfg.add_state()

    # State transitions
    fsm = {0: 3, 3: 1, 1: 5, 5: 7}

    for case, state in [(0, case0), (1, case1), (3, case3), (5, case5)]:
        sdfg.add_edge(init, state, dace.InterstateEdge(f'nextstate == {case}'))

        r = state.add_read('A')
        t = state.add_tasklet('update', {'ain'}, {'a', 'nstate'},
                              f'a = ain + {case}; nstate = {fsm[case]}')
        w = state.add_write('A')
        ws = state.add_write('nextstate')
        state.add_edge(r, None, t, 'ain', dace.Memlet('A'))
        state.add_edge(t, 'a', w, None, dace.Memlet('A'))
        state.add_edge(t, 'nstate', ws, None, dace.Memlet('nextstate'))

        sdfg.add_edge(state, estate, dace.InterstateEdge())
    sdfg.add_edge(estate, init, dace.InterstateEdge())

    A = np.array([1], dtype=np.int32)
    sdfg(A=A, nextstate=0)
    assert A[0] == 1 + 3 + 1 + 5

    if dace.Config.get_bool('optimizer', 'detect_control_flow'):
        code = sdfg.generate_code()[0].clean_code
        assert 'switch ' in code
Ejemplo n.º 16
0
def test_2d_access_sdfgapi():
    sdfg = dace.SDFG('access2d_sdfg')
    sdfg.add_array('A', [4, 2], dace.float64)
    begin_state = sdfg.add_state()
    state_true = sdfg.add_state()
    state_false = sdfg.add_state()
    state_true.add_edge(
        state_true.add_tasklet('assign', {}, {'a'}, 'a = 100.0'), 'a',
        state_true.add_write('A'), None, dace.Memlet('A[0, 0]'))
    state_false.add_edge(
        state_false.add_tasklet('assign', {}, {'a'}, 'a = -100.0'), 'a',
        state_false.add_write('A'), None, dace.Memlet('A[0, 0]'))

    sdfg.add_edge(begin_state, state_true, dace.InterstateEdge('A[1,1] < 0.5'))
    sdfg.add_edge(begin_state, state_false,
                  dace.InterstateEdge('A[1,1] >= 0.5'))

    # Prepare inputs
    A = np.random.rand(4, 2)
    expected = A.copy()
    expected[0, 0] = 100.0 if expected[1, 1] < 0.5 else -100.0

    # Without control-flow detection
    A1 = A.copy()
    csdfg = sdfg.compile()
    csdfg(A=A1)
    assert np.allclose(A1, expected)
    del csdfg

    # With control-flow detection
    end_state = sdfg.add_state()
    sdfg.add_edge(state_true, end_state, dace.InterstateEdge())
    sdfg.add_edge(state_false, end_state, dace.InterstateEdge())
    assert 'else' in sdfg.generate_code()[0].code

    csdfg = sdfg.compile()
    csdfg(A=A)
    assert np.allclose(A, expected)
Ejemplo n.º 17
0
def test_fuse_assignment_in_use():
    """ 
    Two states with an interstate assignment in between, where the assigned
    value is used in the first state. Should fail.
    """
    sdfg = dace.SDFG('state_fusion_test')
    sdfg.add_array('A', [2], dace.int32)
    state1, state2, state3, state4 = tuple(sdfg.add_state() for _ in range(4))
    sdfg.add_edge(state1, state2, dace.InterstateEdge(assignments=dict(k=1)))
    sdfg.add_edge(state2, state3, dace.InterstateEdge())
    sdfg.add_edge(state3, state4, dace.InterstateEdge(assignments=dict(k=2)))

    state3.add_edge(state3.add_tasklet('one', {}, {'a'}, 'a = k'), 'a',
                    state3.add_write('A'), None, dace.Memlet('A[0]'))

    state4.add_edge(state3.add_tasklet('two', {}, {'a'}, 'a = k'), 'a',
                    state3.add_write('A'), None, dace.Memlet('A[1]'))

    try:
        StateFusion.apply_to(sdfg, first_state=state3, second_state=state4)
        raise AssertionError('States fused, test failed')
    except ValueError:
        print('Exception successfully caught')
def _make_sdfg(name, storage=dace.dtypes.StorageType.CPU_Heap):

    N = dace.symbol('N', dtype=dace.int32, integer=True, positive=True)
    i = dace.symbol('i', dtype=dace.int32, integer=True)

    sdfg = dace.SDFG(name)
    _, A = sdfg.add_array('A', [N, N, N], dtype=dace.float64)
    _, B = sdfg.add_array('B', [N], dtype=dace.float64)
    _, tmp1 = sdfg.add_transient('tmp1', [N - 4, N - 4, N - i],
                                 dtype=dace.float64,
                                 storage=storage)
    _, tmp2 = sdfg.add_transient('tmp2', [1],
                                 dtype=dace.float64,
                                 storage=storage)

    begin_state = sdfg.add_state("begin", is_start_state=True)
    guard_state = sdfg.add_state("guard")
    body1_state = sdfg.add_state("body1")
    body2_state = sdfg.add_state("body2")
    body3_state = sdfg.add_state("body3")
    end_state = sdfg.add_state("end")

    sdfg.add_edge(begin_state, guard_state,
                  dace.InterstateEdge(assignments=dict(i='0')))
    sdfg.add_edge(guard_state, body1_state,
                  dace.InterstateEdge(condition=f'i<{N}'))
    sdfg.add_edge(guard_state, end_state,
                  dace.InterstateEdge(condition=f'i>={N}'))
    sdfg.add_edge(body1_state, body2_state, dace.InterstateEdge())
    sdfg.add_edge(body2_state, body3_state, dace.InterstateEdge())
    sdfg.add_edge(body3_state, guard_state,
                  dace.InterstateEdge(assignments=dict(i='i+1')))

    read_a = body1_state.add_read('A')
    write_tmp1 = body1_state.add_write('tmp1')
    body1_state.add_nedge(read_a, write_tmp1,
                          dace.Memlet(f'A[2:{N}-2, 2:{N}-2, i:{N}]'))

    read_tmp1 = body2_state.add_read('tmp1')
    rednode = standard.Reduce(wcr='lambda a, b : a + b', identity=0)
    if storage == dace.dtypes.StorageType.GPU_Global:
        rednode.implementation = 'CUDA (device)'
    elif storage == dace.dtypes.StorageType.FPGA_Global:
        rednode.implementation = 'FPGAPartialReduction'
    body2_state.add_node(rednode)
    write_tmp2 = body2_state.add_write('tmp2')
    body2_state.add_nedge(read_tmp1, rednode,
                          dace.Memlet.from_array('tmp1', tmp1))
    body2_state.add_nedge(rednode, write_tmp2, dace.Memlet('tmp2[0]'))

    read_tmp2 = body3_state.add_read('tmp2')
    write_b = body3_state.add_write('B')
    body3_state.add_nedge(read_tmp2, write_b, dace.Memlet('B[i]'))

    return sdfg
Ejemplo n.º 19
0
def test_recursive_cprop():
    sdfg = dace.SDFG('program')
    a = sdfg.add_state()
    b = sdfg.add_state()
    sdfg.add_edge(a, b, dace.InterstateEdge(assignments=dict(i=1)))

    nsdfg = dace.SDFG('nested')
    b.add_nested_sdfg(nsdfg, None, {}, {}, symbol_mapping={'i': 'i + 1'})

    nstate = nsdfg.add_state()
    t = nstate.add_tasklet('doprint', {}, {}, 'printf("%d\\n", i)')

    ConstantPropagation().apply_pass(sdfg, {})

    assert len(sdfg.symbols) == 0
    assert len(nsdfg.symbols) == 0
    assert '2' in t.code.as_string
Ejemplo n.º 20
0
 def test_interstate_edge(self):
     try:
         sdfg = dace.SDFG('ok')
         state = sdfg.add_state('also_ok', is_start_state=True)
         A = state.add_array('A', [1], dace.float32)
         B = state.add_array('B', [1], dace.float32)
         t = state.add_tasklet('tasklet', {'a'}, {'b'}, 'b = a')
         state.add_edge(A, None, t, 'a',
                        dace.Memlet.from_array(A.data, A.desc(sdfg)))
         state.add_edge(t, 'b', B, None,
                        dace.Memlet.from_array(B.data, B.desc(sdfg)))
         sdfg.add_edge(state, state,
                       dace.InterstateEdge(assignments={'%5': '1'}))
         sdfg.validate()
         self.fail('Failed to detect invalid interstate edge')
     except dace.sdfg.InvalidSDFGInterstateEdgeError as ex:
         print('Exception caught:', ex)
Ejemplo n.º 21
0
def test_sae_scalar():
    # Construct SDFG
    sdfg = dace.SDFG('state_assign_elimination_test')
    sdfg.add_array('A', [20, 20], dace.float64)
    sdfg.add_array('B', [1], dace.float64)
    sdfg.add_scalar('scal', dace.int32, transient=True)
    initstate = sdfg.add_state()
    initstate.add_edge(initstate.add_tasklet('do', {}, {'out'}, 'out = 5'),
                       'out', initstate.add_write('scal'), None,
                       dace.Memlet('scal'))
    state = sdfg.add_state()
    sdfg.add_edge(initstate, state,
                  dace.InterstateEdge(assignments=dict(s2='scal')))
    a = state.add_read('A')
    t = state.add_tasklet('do', {'inp'}, {'out'}, 'out = inp')
    b = state.add_write('B')
    state.add_edge(a, None, t, 'inp', dace.Memlet('A[s2, s2 + 1]'))
    state.add_edge(t, 'out', b, None, dace.Memlet('B[0]'))
    #######################################################

    assert sdfg.apply_transformations(StateAssignElimination) == 0
Ejemplo n.º 22
0
def test_one_to_two_cc_fusion():
    """ Two states, first with one connected component, second with two. """
    sdfg = dace.SDFG('state_fusion_test')
    sdfg.add_array('A', [1], dace.int32)
    sdfg.add_array('B', [1], dace.int32)
    state1, state2 = tuple(sdfg.add_state() for _ in range(2))
    sdfg.add_edge(state1, state2, dace.InterstateEdge())

    # First state
    t1 = state1.add_tasklet('one', {}, {'a', 'b'}, 'a = 1; b = 2')
    state1.add_edge(t1, 'a', state1.add_write('A'), None, dace.Memlet('A'))
    state1.add_edge(t1, 'b', state1.add_write('B'), None, dace.Memlet('B'))

    # Second state
    state2.add_edge(state2.add_read('A'), None,
                    state2.add_tasklet('one', {'a'}, {}, ''), 'a',
                    dace.Memlet('A'))
    state2.add_edge(state2.add_read('B'), None,
                    state2.add_tasklet('two', {'b'}, {}, ''), 'b',
                    dace.Memlet('B'))

    assert sdfg.apply_transformations_repeated(StateFusion) == 1
Ejemplo n.º 23
0
def test_dse_unconditional():
    sdfg = dace.SDFG('dse_tester')
    sdfg.add_symbol('a', dace.int32)
    s = sdfg.add_state()
    s1 = sdfg.add_state()
    s2 = sdfg.add_state()
    s3 = sdfg.add_state()
    e = sdfg.add_state()
    sdfg.add_edge(s, s1, dace.InterstateEdge('a > 0'))
    sdfg.add_edge(s, s2, dace.InterstateEdge('a >= a'))  # Always True
    sdfg.add_edge(s, s3, dace.InterstateEdge('a < 0'))
    sdfg.add_edge(s1, e, dace.InterstateEdge())
    sdfg.add_edge(s2, e, dace.InterstateEdge())
    sdfg.add_edge(s3, e, dace.InterstateEdge())

    DeadStateElimination().apply_pass(sdfg, {})
    assert set(sdfg.states()) == {s, s2, e}
Ejemplo n.º 24
0
def make_sdfg(implementation,
              dtype,
              storage=dace.StorageType.Default,
              data_layout='CCC'):
    m = dace.symbol("m")
    n = dace.symbol("n")
    k = dace.symbol("k")

    suffix = "_device" if storage != dace.StorageType.Default else ""
    transient = storage != dace.StorageType.Default

    sdfg = dace.SDFG("mm_{}_{}".format(dtype.type.__name__, data_layout))
    state = sdfg.add_state("dataflow")

    # Data layout is a 3-character string with either C (for row major)
    # or F (for column major) matrices for x, y, and z respectively.
    xstrides = (k, 1) if data_layout[0] == 'C' else (1, m)
    ystrides = (n, 1) if data_layout[1] == 'C' else (1, k)
    zstrides = (n, 1) if data_layout[2] == 'C' else (1, m)

    sdfg.add_array("x" + suffix, [m, k],
                   dtype,
                   storage=storage,
                   transient=transient,
                   strides=xstrides)
    sdfg.add_array("y" + suffix, [k, n],
                   dtype,
                   storage=storage,
                   transient=transient,
                   strides=ystrides)
    sdfg.add_array("result" + suffix, [m, n],
                   dtype,
                   storage=storage,
                   transient=transient,
                   strides=zstrides)

    x = state.add_read("x" + suffix)
    y = state.add_read("y" + suffix)
    result = state.add_write("result" + suffix)

    node = blas.nodes.matmul.MatMul("matmul", dtype)

    state.add_memlet_path(x,
                          node,
                          dst_conn="_a",
                          memlet=Memlet.simple(x, "0:m, 0:k"))
    state.add_memlet_path(y,
                          node,
                          dst_conn="_b",
                          memlet=Memlet.simple(y, "0:k, 0:n"))
    state.add_memlet_path(node,
                          result,
                          src_conn="_c",
                          memlet=Memlet.simple(result, "0:m, 0:n"))

    if storage != dace.StorageType.Default:
        sdfg.add_array("x", [m, k], dtype)
        sdfg.add_array("y", [k, n], dtype)
        sdfg.add_array("result", [m, n], dtype)

        init_state = sdfg.add_state("copy_to_device")
        sdfg.add_edge(init_state, state, dace.InterstateEdge())

        x_host = init_state.add_read("x")
        y_host = init_state.add_read("y")
        x_device = init_state.add_write("x" + suffix)
        y_device = init_state.add_write("y" + suffix)
        init_state.add_memlet_path(x_host,
                                   x_device,
                                   memlet=Memlet.simple(x_host, "0:m, 0:k"))
        init_state.add_memlet_path(y_host,
                                   y_device,
                                   memlet=Memlet.simple(y_host, "0:k, 0:n"))

        finalize_state = sdfg.add_state("copy_to_host")
        sdfg.add_edge(state, finalize_state, dace.InterstateEdge())

        result_device = finalize_state.add_write("result" + suffix)
        result_host = finalize_state.add_read("result")
        finalize_state.add_memlet_path(result_device,
                                       result_host,
                                       memlet=Memlet.simple(
                                           result_device, "0:m, 0:n"))

    return sdfg
Ejemplo n.º 25
0
                      src_conn='out',
                      memlet=dace.Memlet('fpga_C[i]'))

# add copy to device state
copy_to_device = sdfg.add_state('copy_to_device')
cpu_a = copy_to_device.add_read('A')
cpu_b = copy_to_device.add_read('B')
dev_a = copy_to_device.add_write('fpga_A')
dev_b = copy_to_device.add_write('fpga_B')
copy_to_device.add_memlet_path(cpu_a,
                               dev_a,
                               memlet=dace.Memlet('A[0:N//VECLEN]'))
copy_to_device.add_memlet_path(cpu_b,
                               dev_b,
                               memlet=dace.Memlet('B[0:N//VECLEN]'))
sdfg.add_edge(copy_to_device, state, dace.InterstateEdge())

# add copy to host state
copy_to_host = sdfg.add_state('copy_to_host')
dev_c = copy_to_host.add_read('fpga_C')
cpu_c = copy_to_host.add_write('C')
copy_to_host.add_memlet_path(dev_c,
                             cpu_c,
                             memlet=dace.Memlet('C[0:N//VECLEN]'))
sdfg.add_edge(state, copy_to_host, dace.InterstateEdge())

# validate sdfg
sdfg.validate()

######################################################################
Ejemplo n.º 26
0
loopstate1 = sdfg.add_state('loops1')
mystate(loopstate1, 'B', 'A')

state2 = sdfg.add_state('s2')
endstate(state2)

# State connection (control flow)

# Note: dataflow (arrays) CAN affect control flow assignments and conditions,
#       but not the other way around (you cannot change an interstate variable
#       inside a state). The following code works as well:
#sdfg.add_edge(state0, guard, dace.InterstateEdge(assigments=dict('k', 'A[0]')))

# Loop initialization (k=0)
sdfg.add_edge(state0, guard, dace.InterstateEdge(assignments=dict(k='0')))

# Loop condition (k < T / k >= T)
sdfg.add_edge(guard, loopstate0, dace.InterstateEdge('k < T'))
sdfg.add_edge(guard, state2, dace.InterstateEdge('k >= T'))

# Loop incrementation (k++)
sdfg.add_edge(loopstate1, guard,
              dace.InterstateEdge(assignments=dict(k='k+1')))

# Loop-internal interstate edges
sdfg.add_edge(loopstate0, loopstate1, dace.InterstateEdge())

# Validate correctness of initial SDFG
sdfg.validate()
Ejemplo n.º 27
0
    def Export_loop(self, multi_stage: MultiStage,
                    execution_order: ExecutionOrder):
        last_state = None
        first_state = None
        # This is the state previous to this ms

        for stage in multi_stage.stages:
            for do_method in stage.do_methods:
                reads = do_method.ReadIds()
                writes = do_method.WriteIds()
                all = reads | writes
                globals = {id for id in all if self.id_resolver.IsGlobal(id)}

                self.TryAddArray(self.sdfg, all - globals, transient=True)
                # self.TryAddScalar(self.sdfg, reads & globals)

                halo = ClosedInterval3D(Symbol('halo'), Symbol('halo'),
                                        Symbol('halo'), Symbol('halo'), 0, 0)
                halo -= stage.extents
                bc_dict = {"btype": "shrink", "halo": halo.to_6_tuple()}
                boundary_conditions = {
                    f'{self.Name(id)}_out': bc_dict
                    for id in writes
                }

                state = self.sdfg.add_state(str(do_method))

                stenc = StencilLib(
                    label=str(do_method),
                    shape=[I, J, 1],
                    accesses=self.Create_Variable_Access_map(
                        do_method.Reads(), '_in'),  # input fields
                    output_fields=self.Create_Variable_Access_map(
                        do_method.Writes(), '_out'),  # output fields
                    boundary_conditions=boundary_conditions,
                    code=do_method.Code())
                stenc.implementation = 'CPU'
                state.add_node(stenc)

                # Add memlet path from state.read to stencil.
                for id, acc in do_method.read_memlets.items():
                    name = self.Name(id)
                    dims = self.Dimensions(id)
                    subset = ','.join(
                        dim_filter(
                            dims, '0:I', '0:J',
                            f'k+{acc.k.lower}:k+{acc.k.upper+1}')) or '0'

                    state.add_memlet_path(
                        state.add_read(name),
                        stenc,
                        memlet=dace.Memlet(f'{name}[{subset}]'),
                        dst_conn=name + '_in',
                        propagate=True)

                # Add memlet path from stencil to state.write.
                for id, acc in do_method.write_memlets.items():
                    name = self.Name(id)
                    dims = self.Dimensions(id)
                    subset = ','.join(
                        dim_filter(
                            dims, '0:I', '0:J',
                            f'k+{acc.k.lower}:k+{acc.k.upper+1}')) or '0'

                    state.add_memlet_path(
                        stenc,
                        state.add_write(name),
                        memlet=dace.Memlet(f'{name}[{subset}]'),
                        src_conn=name + '_out',
                        propagate=True)

                if first_state is None:
                    first_state = state

                if last_state is not None:
                    self.sdfg.add_edge(last_state, state,
                                       dace.InterstateEdge())
                last_state = state

        if execution_order == ExecutionOrder.Forward_Loop.value:
            initialize_expr = str(do_method.k_interval.lower)
            condition_expr = f'k < {do_method.k_interval.upper}'
            increment_expr = 'k + 1'
        else:
            initialize_expr = str(do_method.k_interval.upper - 1)
            condition_expr = f'k >= {do_method.k_interval.lower}'
            increment_expr = 'k - 1'

        print(initialize_expr, condition_expr, increment_expr)

        _, _, last_state = self.sdfg.add_loop(before_state=self.last_state_,
                                              loop_state=first_state,
                                              loop_end_state=last_state,
                                              after_state=None,
                                              loop_var='k',
                                              initialize_expr=initialize_expr,
                                              condition_expr=condition_expr,
                                              increment_expr=increment_expr)
        return last_state
Ejemplo n.º 28
0
    def Export_parallel(self, multi_stage: MultiStage):
        ms_state = self.sdfg.add_state(f'ms_state_{CreateUID()}')
        ms_sdfg = dace.SDFG(f'ms_sdfg_{CreateUID()}')
        last_state = None

        for stage in multi_stage.stages:
            for do_method in stage.do_methods:
                reads = do_method.ReadIds()
                writes = do_method.WriteIds()
                all = reads | writes
                globals = {id for id in all if self.id_resolver.IsGlobal(id)}

                self.TryAddArray(ms_sdfg, all - globals)
                # self.TryAddScalar(ms_sdfg, reads & globals)

                self.TryAddArray(self.sdfg, all - globals, transient=True)
                # self.TryAddScalar(self.sdfg, reads & globals)

                halo = ClosedInterval3D(Symbol('halo'), Symbol('halo'),
                                        Symbol('halo'), Symbol('halo'), 0, 0)
                halo -= stage.extents
                bc_dict = {"btype": "shrink", "halo": halo.to_6_tuple()}
                boundary_conditions = {
                    f'{self.Name(id)}_out': bc_dict
                    for id in writes
                }

                state = ms_sdfg.add_state(str(do_method))

                stenc = StencilLib(
                    label=str(do_method),
                    shape=[I, J, 1],
                    accesses=self.Create_Variable_Access_map(
                        do_method.Reads(), '_in'),  # input fields
                    output_fields=self.Create_Variable_Access_map(
                        do_method.Writes(), '_out'),  # output fields
                    boundary_conditions=boundary_conditions,
                    code=do_method.Code())
                stenc.implementation = 'CPU'
                state.add_node(stenc)

                # Add memlet path from state.read to stencil.
                for id, acc in do_method.read_memlets.items():
                    name = self.Name(id)
                    dims = self.Dimensions(id)
                    subset = ','.join(
                        dim_filter(dims, '0:I', '0:J',
                                   HalfOpenIntervalStr(acc.k))) or '0'

                    state.add_memlet_path(
                        state.add_read(name),
                        stenc,
                        memlet=dace.Memlet(f'{name}[{subset}]'),
                        dst_conn=name + '_in',
                        propagate=True)

                # Add memlet path from stencil to state.write.
                for id, acc in do_method.write_memlets.items():
                    name = self.Name(id)
                    dims = self.Dimensions(id)
                    subset = ','.join(
                        dim_filter(dims, '0:I', '0:J',
                                   HalfOpenIntervalStr(acc.k))) or '0'

                    state.add_memlet_path(
                        stenc,
                        state.add_write(name),
                        memlet=dace.Memlet(f'{name}[{subset}]'),
                        src_conn=name + '_out',
                        propagate=True)

                # set the state to be the last one to connect them
                if last_state is not None:
                    ms_sdfg.add_edge(last_state, state, dace.InterstateEdge())
                last_state = state

        read_ids = multi_stage.ReadIds()
        write_ids = multi_stage.WriteIds()

        read_names = set(self.Name(id) for id in read_ids)
        write_names = set(self.Name(id) for id in write_ids)

        nested_sdfg = ms_state.add_nested_sdfg(
            ms_sdfg, self.sdfg, read_names, write_names, {
                'halo': dace.symbol('halo'),
                'I': dace.symbol('I'),
                'J': dace.symbol('J'),
                'K': dace.symbol('K'),
                'IJK_stride_I': dace.symbol('IJK_stride_I'),
                'IJK_stride_J': dace.symbol('IJK_stride_J'),
                'IJK_stride_K': dace.symbol('IJK_stride_K'),
                'IJK_total_size': dace.symbol('IJK_total_size'),
                'IJ_stride_I': dace.symbol('IJ_stride_I'),
                'IJ_stride_J': dace.symbol('IJ_stride_J'),
                'IJ_total_size': dace.symbol('IJ_total_size'),
                'I_total_size': dace.symbol('I_total_size'),
                'J_total_size': dace.symbol('J_total_size'),
                'K_total_size': dace.symbol('K_total_size')
            })

        map_entry, map_exit = ms_state.add_map(
            "kmap", {'k': str(do_method.k_interval)})

        for id, acc in multi_stage.read_memlets.items():
            if id not in read_ids:
                continue
            name = self.Name(id)
            dims = self.Dimensions(id)
            subset = ','.join(
                dim_filter(dims, '0:I', '0:J',
                           f'k+{acc.k.lower}:k+{acc.k.upper+1}')) or '0'

            # add the reads and the input memlet path : read -> map_entry -> nested_sdfg
            ms_state.add_memlet_path(ms_state.add_read(name),
                                     map_entry,
                                     nested_sdfg,
                                     memlet=dace.Memlet(f'{name}[{subset}]'),
                                     dst_conn=name,
                                     propagate=True)
        if len(read_ids) == 0:
            # If there are no inputs to this SDFG, connect it to the map with an empty memlet
            # to keep it in the scope.
            ms_state.add_edge(map_entry, None, nested_sdfg, None,
                              dace.memlet.Memlet())

        # output memlets
        for id, acc in multi_stage.write_memlets.items():
            if id not in write_ids:
                continue
            name = self.Name(id)
            dims = self.Dimensions(id)
            subset = ','.join(
                dim_filter(dims, '0:I', '0:J',
                           f'k+{acc.k.lower}:k+{acc.k.upper+1}')) or '0'

            # add the writes and the output memlet path : nested_sdfg -> map_exit -> write
            ms_state.add_memlet_path(nested_sdfg,
                                     map_exit,
                                     ms_state.add_write(name),
                                     memlet=dace.Memlet(f'{name}[{subset}]'),
                                     src_conn=name,
                                     propagate=True)

        if self.last_state_ is not None:
            self.sdfg.add_edge(self.last_state_, ms_state,
                               dace.InterstateEdge())

        return ms_state
Ejemplo n.º 29
0
def make_sdfg(dtype, name="pipeline_test"):

    n = dace.symbol("N")
    k = dace.symbol("K")
    m = dace.symbol("M")

    sdfg = dace.SDFG(name)

    pre_state = sdfg.add_state(name + "_pre")
    state = sdfg.add_state(name)
    post_state = sdfg.add_state(name + "_post")
    sdfg.add_edge(pre_state, state, dace.InterstateEdge())
    sdfg.add_edge(state, post_state, dace.InterstateEdge())

    _, desc_input_host = sdfg.add_array("a", (n, k, m), dtype)
    _, desc_output_host = sdfg.add_array("b", (n, k, m), dtype)
    desc_input_device = copy.copy(desc_input_host)
    desc_input_device.storage = dace.StorageType.FPGA_Global
    desc_input_device.location["bank"] = 0
    desc_input_device.transient = True
    desc_output_device = copy.copy(desc_output_host)
    desc_output_device.storage = dace.StorageType.FPGA_Global
    desc_output_device.location["bank"] = 1
    desc_output_device.transient = True
    sdfg.add_datadesc("a_device", desc_input_device)
    sdfg.add_datadesc("b_device", desc_output_device)

    # Host to device
    pre_read = pre_state.add_read("a")
    pre_write = pre_state.add_write("a_device")
    pre_state.add_memlet_path(pre_read,
                              pre_write,
                              memlet=dace.Memlet.simple(pre_write,
                                                        "0:N, 0:K, 0:M"))

    # Device to host
    post_read = post_state.add_read("b_device")
    post_write = post_state.add_write("b")
    post_state.add_memlet_path(post_read,
                               post_write,
                               memlet=dace.Memlet.simple(post_write,
                                                         "0:N, 0:K, 0:M"))

    # Compute state
    read_memory = state.add_read("a_device")
    write_memory = state.add_write("b_device")

    # Memory streams
    sdfg.add_stream("a_stream",
                    dtype,
                    storage=dace.StorageType.FPGA_Local,
                    transient=True)
    sdfg.add_stream("b_stream",
                    dtype,
                    storage=dace.StorageType.FPGA_Local,
                    transient=True)
    produce_input_stream = state.add_write("a_stream")
    consume_input_stream = state.add_read("a_stream")
    produce_output_stream = state.add_write("b_stream")
    consume_output_stream = state.add_write("b_stream")

    entry, exit = state.add_pipeline(name, {
        "n": "0:N",
        "k": "0:K",
        "m": "0:M",
    },
                                     schedule=dace.ScheduleType.FPGA_Device,
                                     init_size=k * m,
                                     init_overlap=True,
                                     drain_size=k * m,
                                     drain_overlap=True)

    tasklet = state.add_tasklet(
        name, {"_in"}, {"_out"},
        """_out = _in + (1 if {} else (3 if {} else 2))""".format(
            entry.pipeline.init_condition(), entry.pipeline.drain_condition()))

    # Container-to-container copies between arrays and streams
    state.add_memlet_path(read_memory,
                          produce_input_stream,
                          memlet=dace.Memlet.simple(read_memory.data,
                                                    "0:N, 0:K, 0:M",
                                                    other_subset_str="0",
                                                    num_accesses=n * k * m))
    state.add_memlet_path(consume_output_stream,
                          write_memory,
                          memlet=dace.Memlet.simple(write_memory.data,
                                                    "0:N, 0:K, 0:M",
                                                    other_subset_str="0",
                                                    num_accesses=n * k * m))

    # Input stream to buffer
    state.add_memlet_path(consume_input_stream,
                          entry,
                          tasklet,
                          dst_conn="_in",
                          memlet=dace.Memlet.simple(
                              consume_input_stream.data,
                              "0",
                              num_accesses=-1))

    # Buffer to output stream
    state.add_memlet_path(tasklet,
                          exit,
                          produce_output_stream,
                          src_conn="_out",
                          memlet=dace.Memlet.simple(
                              produce_output_stream.data,
                              "0",
                              num_accesses=-1))

    return sdfg
Ejemplo n.º 30
0
s2 = sdfg.add_state()

# Arrays
inp = s0.add_array('inp', [1], dp.float32)
A = s0.add_array('A', [1], dp.float32)
t = s0.add_tasklet('seta', {'a'}, {'b'}, 'b = a')
s0.add_edge(inp, None, t, 'a', dp.Memlet.from_array(inp.data, inp.desc(sdfg)))
s0.add_edge(t, 'b', A, None, dp.Memlet.from_array(A.data, A.desc(sdfg)))

A = s1.add_array('A', [1], dp.float32)
t = s1.add_tasklet('geta', {'a'}, {}, 'printf("ok %f\\n", a + 1)')
s1.add_edge(A, None, t, 'a', dp.Memlet.from_array(A.data, A.desc(sdfg)))

A = s2.add_array('A', [1], dp.float32)
t = s2.add_tasklet('geta', {'a'}, {}, 'printf("BAD %f\\n", a - 1)')
s2.add_edge(A, None, t, 'a', dp.Memlet.from_array(A.data, A.desc(sdfg)))

sdfg.add_edge(s0, s1, dp.InterstateEdge('A[0] > 3'))
sdfg.add_edge(s0, s2, dp.InterstateEdge('A[0] <= 3'))

if __name__ == '__main__':
    print('Toplevel array usage in interstate edge')
    input = np.ndarray([1], np.float32)
    input[0] = 10
    output = np.ndarray([1], np.float32)
    output[0] = 10

    sdfg(inp=input, A=output)

    exit(0)