Ejemplos de RAM en Python, ejemplos de veriloggen.thread.RAM en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: thread_ram_copy_pattern.py Proyecto: xiaolongn100/veriloggen

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10

    ram0 = vthread.RAM(m, 'ram0', clk, rst, datawidth, addrwidth)
    ram1 = vthread.RAM(m, 'ram1', clk, rst, datawidth, addrwidth)

    def blink(times):
        for i in range(times):
            wdata = i
            ram0.write(i, wdata)
            print('wdata = %d' % wdata)

        # reverse order
        vthread.copy_pattern(ram0, ram1, 0, times - 1, ((times, 1), ),
                             ((times, -1), ))

        sum = 0
        for i in range(times):
            rdata = ram1.read(i)
            sum += rdata
            print('rdata = %d' % rdata)

        print('sum = %d' % sum)

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(10)

    return m

Ejemplo n.º 2

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    size = strm.constant('size')
    sum, sum_valid = strm.ReduceAddValid(a, size)
    strm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_constant('size', size)
        strm.set_sink('sum', ram_b, offset, 1)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            sum += a
        ram_b.write(offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_b.read(i + offset_stream)
            sq = ram_b.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp(size):
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_b, offset, 1024, 1)

        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_b, offset, 1024 * 2, 1)

        check(1, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m

Ejemplo n.º 3

0

Mostrar archivo

Archivo: thread_matmul_ipcore.py Proyecto: yinxx/veriloggen

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    def matmul():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)
            matrix_size = saxi.read(1)
            a_offset = saxi.read(2)
            b_offset = saxi.read(3)
            c_offset = saxi.read(4)
            comp(matrix_size, a_offset, b_offset, c_offset)
            saxi.write_flag(5, 1, resetvalue=0)

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            maxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                maxi.dma_read(ram_b, 0, b_addr, matrix_size)

                sum = 0
                for k in range(matrix_size):
                    x = ram_a.read(k)
                    y = ram_b.read(k)
                    sum += x * y
                ram_c.write(j, sum)

                b_addr += matrix_size * (datawidth // 8)

            maxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start()

    return m

Ejemplo n.º 4

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10

    ram0 = vthread.RAM(m, 'ram0', clk, rst, datawidth, addrwidth)
    ram1 = vthread.RAM(m, 'ram1', clk, rst, datawidth, addrwidth)

    def blink(times):
        all_ok = True

        write_sum = 0
        for i in range(times):
            wdata = i
            ram0.write(i, wdata)
            write_sum += wdata
            print('wdata = %d' % wdata)

        # reverse order
        vthread.copy_pattern(ram0, ram1, 0, times - 1, ((times, 1), ),
                             ((times, -1), ))

        read_sum = 0
        for i in range(times):
            rdata = ram1.read(i)
            read_sum += rdata
            print('rdata = %d' % rdata)
            # reverse order
            if vthread.verilog.NotEql(rdata, times - i - 1):
                all_ok = False

        print('read_sum = %d' % read_sum)

        if vthread.verilog.NotEql(read_sum, write_sum):
            all_ok = False

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(10)

    return m

Ejemplo n.º 5

0

Mostrar archivo

Archivo: thread_ram.py Proyecto: yinxx/veriloggen

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    def blink(times):
        for i in range(times):
            wdata = i
            myram.write(i, wdata)
            print('wdata = %d' % wdata)

        sum = 0
        for i in range(times):
            rdata = myram.read(i)
            sum += rdata
            print('rdata = %d' % rdata)

        print('sum = %d' % sum)

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(10)

    return m

Ejemplo n.º 6

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    numports = 1
    initvals = [i + 10 for i in range(2 ** addrwidth - 100)]
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth,
                        numports, initvals)

    def blink(times):
        for i in range(times):
            rdata = myram.read(i)
            print('rdata = %d' % rdata)

            wdata = rdata + 1
            myram.write(i, wdata)
            print('wdata = %d' % wdata)

        sum = 0
        for i in range(times):
            rdata = myram.read(i)
            sum += rdata
            print('rdata = %d' % rdata)

        print('sum = %d' % sum)

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(10)

    return m

Ejemplo n.º 7

0

Mostrar archivo

def mkLed(numthreads=8):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('LED', 8)

    datawidth = 32
    addrwidth = 10
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    mymutex = vthread.Mutex(m, 'mymutex', clk, rst)

    def myfunc(tid, size):
        mymutex.lock()
        print("Thread %d Lock" % tid)

        for i in range(size):
            read_data = myram.read(i)
            write_data = read_data + tid + i
            myram.write(i, write_data)
            print("Thread %d ram[%d] <- %d" % (tid, i, write_data))

        mymutex.unlock()
        print("Thread %d Unlock" % tid)

    def blink():
        all_ok = True

        size = 16
        for i in range(size):
            myram.write(i, 0)

        for tid in range(numthreads):
            pool.run(tid, tid, size)

        for tid in range(numthreads):
            pool.join(tid)

        for i in range(size):
            read_data = myram.read(i)
            led.value = read_data
            print("result ram[%d] = %d" % (i, read_data))

            expected = i * numthreads + (0 + numthreads - 1) * numthreads // 2
            if vthread.verilog.NotEql(read_data, expected):
                all_ok = False
                print(i, read_data, expected)

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    pool = vthread.ThreadPool(m, 'th_myfunc', clk, rst, myfunc, numthreads)
    fsm = th.start()

    return m

Ejemplo n.º 8

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    read_size = 10
    write_size = read_size

    write_done = m.Reg('write_done', initval=0)
    write_addr = m.Reg('write_addr', addrwidth, initval=0)
    write_data = write_addr
    read_addr = m.Reg('read_addr', addrwidth, initval=0)
    sum = m.Reg('sum', datawidth, initval=0)

    fsm = FSM(m, 'fsm', clk, rst)
    fsm.If(write_done).goto_next()

    # write_rtl
    myram.write_rtl(write_addr, write_data, cond=fsm)
    fsm(write_addr.inc())
    fsm(Display('wdata =  %d', write_data))
    fsm.If(write_addr == write_size - 1).goto_next()

    # read_rtl
    read_data, read_valid = myram.read_rtl(read_addr, cond=fsm)
    fsm.goto_next()

    fsm(read_addr.inc())
    read_data, read_valid = myram.read_rtl(read_addr, cond=fsm)
    fsm.If(read_valid)(Display('rdata =  %d', read_data), sum.add(read_data))
    fsm.If(read_addr == read_size - 1).goto_next()

    fsm.If(read_valid)(Display('rdata =  %d', read_data), sum.add(read_data))
    fsm.goto_next()

    fsm.If(read_valid)(Display('rdata =  %d', read_data), sum.add(read_data))
    fsm.goto_next()

    fsm(Display('sum =  %d', sum))
    fsm.goto_next()

    def blink(times):
        write_done.value = 0
        for i in range(times):
            wdata = i + 100
            myram.write(i, wdata)
            print('wdata = %d' % wdata)
        write_done.value = 1

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(read_size)

    return m

Ejemplo n.º 9

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    numports = 1
    initvals = [Cat(Int(0, width=16), Int(i, width=12), Int(0, width=4))
                for i in range(2 ** addrwidth - 100)]
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth,
                        numports, initvals, nocheck_initvals=True)

    def blink(times):
        all_ok = True

        write_sum = 0
        for i in range(times):
            rdata = myram.read(i)
            print('rdata = %d' % rdata)
            if vthread.verilog.NotEql(rdata, i * 16):
                all_ok = False

            wdata = rdata + 1
            myram.write(i, wdata)
            write_sum += wdata
            print('wdata = %d' % wdata)

        read_sum = 0
        for i in range(times):
            rdata = myram.read(i)
            read_sum += rdata
            print('rdata = %d' % rdata)
            if vthread.verilog.NotEql(rdata, i * 16 + 1):
                all_ok = False

        print('read_sum = %d' % read_sum)

        if vthread.verilog.NotEql(read_sum, write_sum):
            all_ok = False

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(10)

    return m

Ejemplo n.º 10

0

Mostrar archivo

def mkMemcpy():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10

    ram_words = (2**addrwidth) // (datawidth // 8)

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    def memcpy():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)

            copy_bytes = saxi.read(1)
            src_offset = saxi.read(2)
            dst_offset = saxi.read(3)

            copy(copy_bytes, src_offset, dst_offset)

            saxi.write_flag(4, 1, resetvalue=0)

    def copy(copy_bytes, src_offset, dst_offset):
        rest_words = copy_bytes // (datawidth // 8)
        src_global_addr = src_offset
        dst_global_addr = dst_offset
        local_addr = 0

        while rest_words > 0:
            if rest_words > ram_words:
                dma_size = ram_words
            else:
                dma_size = rest_words

            maxi.dma_read(ram_a, local_addr, src_global_addr, dma_size)
            maxi.dma_write(ram_a, local_addr, dst_global_addr, dma_size)

            src_global_addr += dma_size * (datawidth // 8)
            dst_global_addr += dma_size * (datawidth // 8)
            rest_words -= dma_size

    th = vthread.Thread(m, 'th_memcpy', clk, rst, memcpy)
    fsm = th.start()

    return m

Ejemplo n.º 11

0

Mostrar archivo

Archivo: thread_mutex_ram.py Proyecto: yongfu-li/veriloggen

def mkLed(numthreads=8):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('LED', 8)

    datawidth = 32
    addrwidth = 10
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    mymutex = vthread.Mutex(m, 'mymutex', clk, rst)

    def myfunc(tid, size):
        mymutex.lock()
        print("Thread %d Lock" % tid)

        for i in range(size):
            read_data = myram.read(i)
            write_data = read_data + tid + i
            myram.write(i, write_data)
            print("Thread %d ram[%d] <- %d" % (tid, i, write_data))

        mymutex.unlock()
        print("Thread %d Unlock" % tid)

    def blink():
        size = 16
        for i in range(size):
            myram.write(i, 0)

        for tid in range(numthreads):
            pool.run(tid, tid, size)

        for tid in range(numthreads):
            pool.join(tid)

        for i in range(size):
            read_data = myram.read(i)
            led.value = read_data
            print("result ram[%d] = %d" % (i, read_data))

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    pool = vthread.ThreadPool(m, 'th_myfunc', clk, rst, myfunc, numthreads)
    fsm = th.start()

    return m

Ejemplo n.º 12

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth,
                        ram_style='(* ram_style = "block" *)')

    def blink(times):
        all_ok = True

        write_sum = 0
        for i in range(times):
            wdata = i
            myram.write(i, wdata)
            write_sum += wdata
            print('wdata = %d' % wdata)

        read_sum = 0
        for i in range(times):
            rdata = myram.read(i)
            read_sum += rdata
            print('rdata = %d' % rdata)
            if vthread.verilog.NotEql(rdata, i):
                all_ok = False

        print('read_sum = %d' % read_sum)

        if vthread.verilog.NotEql(read_sum, write_sum):
            all_ok = False

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(10)

    return m

Ejemplo n.º 13

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    img_width = strm.parameter('img_width')

    counter = strm.Counter()

    a = strm.source('a')
    a_addr = strm.Counter()
    sp = strm.Scratchpad(a, a_addr, length=128)

    a0 = a
    a1 = a0.prev(1)
    a2 = a1.prev(1)

    a3_addr = a_addr - img_width
    a3 = sp.read(a3_addr)
    a4 = a3.prev(1)
    a5 = a4.prev(1)

    a6_addr = a3_addr - img_width
    a6 = sp.read(a6_addr)
    a7 = a6.prev(1)
    a8 = a7.prev(1)

    #b = a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8
    b = strm.AddN(a0, a1, a2, a3, a4, a5, a6, a7, a8)

    strm.sink(b, 'b', when=counter >= img_width + img_width + 2)

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size * 3)
        strm.set_sink('b', ram_b, offset, size - 2)
        strm.set_parameter('img_width', size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        for i in range(size - 2):
            a0 = ram_a.read(i + offset)
            a1 = ram_a.read(i + offset + 1)
            a2 = ram_a.read(i + offset + 2)
            a3 = ram_a.read(i + offset + size)
            a4 = ram_a.read(i + offset + size + 1)
            a5 = ram_a.read(i + offset + size + 2)
            a6 = ram_a.read(i + offset + size + size)
            a7 = ram_a.read(i + offset + size + size + 1)
            a8 = ram_a.read(i + offset + size + size + 2)
            b = a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8
            ram_b.write(i + offset, b)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size - 2):
            st = ram_b.read(i + offset_stream)
            sq = ram_b.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size * 3)
        comp_stream(size, offset)
        myaxi.dma_write(ram_b, offset, 1024, size)

        # sequential
        offset = size * 4
        myaxi.dma_read(ram_a, offset, 0, size * 3)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_b, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m

Ejemplo n.º 14

0

Mostrar archivo

def mkLed(memory_datawidth=128):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth)
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        all_ok.value = True

        for i in range(4):
            print('# iter %d start' % i)
            # Test for 4KB boundary check
            offset = i * 1024 * 16 + (myaxi.boundary_size -
                                      memory_datawidth // 8)
            body(size, offset)
            print('# iter %d end' % i)

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    def body(size, offset):
        # write
        for i in range(size):
            wdata = i + 100
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        for i in range(size):
            wdata = i + 1000
            myram.write(i, wdata)

        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 100):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

        # read
        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 1000):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(17)

    return m

Ejemplo n.º 15

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    shape = [16, 4, 8]
    size = functools.reduce(lambda x, y: x * y, shape, 1)
    order = [1, 2, 0]

    def to_pattern(shape, order):
        pattern = []
        for p in order:
            size = shape[p]
            stride = functools.reduce(lambda x, y: x * y, shape[p + 1:], 1)
            pattern.append((size, stride))
        return pattern

    pattern_a = to_pattern(shape, order)
    pattern_b = to_pattern(shape, order)
    pattern_c = to_pattern(shape, order)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b
    strm.sink(c, 'c')

    def comp_stream(offset):
        strm.set_source_pattern('a', ram_a, offset, pattern_a)
        strm.set_source_pattern('b', ram_b, offset, pattern_b)
        strm.set_sink_pattern('c', ram_c, offset, pattern_c)
        strm.run()
        strm.join()

    def comp_sequential(offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b
            ram_c.write(i + offset, sum)

    def check(offset_stream, offset_seq):
        all_ok = True
        st = ram_c.read(offset_stream)
        sq = ram_c.read(offset_seq)
        if vthread.verilog.NotEql(st, sq):
            all_ok = False

        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp():
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 4, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 8, 1)

        # verification
        check(0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m

Ejemplo n.º 16

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    cnt1 = strm.Counter()
    cnt2 = strm.Counter(initval=1)
    cnt3 = strm.Counter(initval=2, size=5)
    cnt4 = strm.Counter(initval=3, interval=3)
    cnt5 = strm.Counter(initval=4, interval=3, size=7)
    cnt6 = strm.Counter(initval=4, step=2, interval=2)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b - a - b + cnt1 + cnt2 + cnt3 + cnt4 + cnt5 + cnt6
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        cnt = 0
        for i in range(size):
            cnt1 = cnt
            cnt2 = 1 + cnt
            cnt3 = (cnt + 2) % 5
            cnt4 = (cnt // 3) + 3
            cnt5 = ((cnt // 3) + 4) % 7
            cnt6 = (cnt // 2) * 2 + 4
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b - a - b + cnt1 + cnt2 + cnt3 + cnt4 + cnt5 + cnt6
            ram_c.write(i + offset, sum)
            cnt += 1

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m

Ejemplo n.º 17

0

Mostrar archivo

Archivo: thread_to_multibank_ram.py Proyecto: tanbour/veriloggen

def mkLed(memory_datawidth=128):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    numbanks = 4
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth)

    myrams = [vthread.RAM(m, 'myram_%d' % i, clk, rst, datawidth, addrwidth)
              for i in range(numbanks)]
    myram = vthread.MultibankRAM(rams=myrams, name='myram')

    all_ok = m.TmpReg(initval=0)

    array_len = 16
    array_size = (array_len + array_len) * 4 * numbanks

    def blink(size):
        all_ok.value = True

        for i in range(4):
            print('# iter %d start' % i)
            # Test for 4KB boundary check
            offset = i * 1024 * 16 + (myaxi.boundary_size - 4)
            body(size, offset)
            print('# iter %d end' % i)

        if all_ok:
            print('ALL OK')

    def body(size, offset):
        # write
        for bank in range(numbanks):
            for i in range(size):
                wdata = i + 100 + bank
                myram.write_bank(bank, i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        for bank in range(numbanks):
            for i in range(size):
                wdata = i + 1000 + bank
                myram.write_bank(bank, i, wdata)

        laddr = 0
        gaddr = array_size + offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for bank in range(numbanks):
            for i in range(size):
                rdata = myram.read_bank(bank, i)
                if vthread.verilog.NotEql(rdata, i + 100 + bank):
                    print('rdata[%d] = %d' % (i, rdata))
                    all_ok.value = False

        # read
        laddr = 0
        gaddr = array_size + offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for bank in range(numbanks):
            for i in range(size):
                rdata = myram.read_bank(bank, i)
                if vthread.verilog.NotEql(rdata, i + 1000 + bank):
                    print('rdata[%d] = %d' % (i, rdata))
                    all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(array_len)

    return m

Ejemplo n.º 18

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    size = strm.constant('size')
    cnt, valid = strm.CounterValid(size)
    a = strm.source('a')
    b = strm.source('b')
    cntval = strm.Mux(valid, 1000, cnt)
    c = a + b + cntval
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_constant('size', size // 2)
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        cnt = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b + cnt
            cnt += 1
            if cnt == 1001:
                cnt = 0
            if cnt == size // 2 - 1:
                cnt = 1000
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m

Ejemplo n.º 19

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    mulstrm = vthread.Stream(m, 'mul_stream', clk, rst)
    mulx = mulstrm.source('x')
    muly = mulstrm.source('y')
    mulz = mulx * muly
    mulstrm.sink(mulz, 'z')

    macstrm = vthread.Stream(m, 'mac_stream', clk, rst)
    a = macstrm.source('a')
    b = macstrm.source('b')
    a = a + 1
    b = b + 1
    sub = macstrm.substream(mulstrm)
    sub.to_source('x', a)
    sub.to_source('y', b)
    c = sub.from_sink('z')
    size = macstrm.constant('size')
    sum, sum_valid = macstrm.ReduceAddValid(c, size)
    macstrm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    actstrm = vthread.Stream(m, 'act_stream', clk, rst)
    a = actstrm.source('a')
    b = actstrm.source('b')
    a = a + 1
    b = b + 1
    a = a + 1
    b = b + 1
    sub = actstrm.substream(mulstrm)
    sub.to_source('x', a)
    sub.to_source('y', b)
    c = sub.from_sink('z')
    size = actstrm.constant('size')
    sum, sum_valid = actstrm.ReduceAddValid(c, size)
    sum = actstrm.Mux(sum > 0, sum, 0)
    actstrm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    def comp_stream_mul(size, offset):
        mulstrm.set_source('x', ram_a, offset, size)
        mulstrm.set_source('y', ram_b, offset, size)
        mulstrm.set_sink('z', ram_c, offset, size)
        mulstrm.run()
        mulstrm.join()

    def comp_stream_mac(size, offset):
        macstrm.set_source('a', ram_a, offset, size)
        macstrm.set_source('b', ram_b, offset, size)
        macstrm.set_constant('size', size)
        macstrm.set_sink('sum', ram_c, offset, 1)
        macstrm.run()
        macstrm.join()

    def comp_stream_act(size, offset):
        actstrm.set_source('a', ram_a, offset, size)
        actstrm.set_source('b', ram_b, offset, size)
        actstrm.set_constant('size', size)
        actstrm.set_sink('sum', ram_c, offset, 1)
        actstrm.run()
        actstrm.join()

    def comp_sequential_mul(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a * b
            ram_c.write(i + offset, sum)

    def comp_sequential_mac(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset) + 1
            b = ram_b.read(i + offset) + 1
            sum += a * b
        ram_c.write(offset, sum)

    def comp_sequential_act(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset) + 2
            b = ram_b.read(i + offset) + 2
            sum += a * b
        if sum <= 0:
            sum = 0
        ram_c.write(offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
                print(i, st, sq)
        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp(size):
        # mul
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_mul(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_mul(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        print('# MUL')
        check(size, 0, offset)

        # mac
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# MAC')
        check(1, 0, offset)

        # act
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# ACT')
        check(1, 0, offset)

        # mac 2
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# MAC')
        check(1, 0, offset)

        # act 2
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# ACT')
        check(1, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m

Ejemplo n.º 20

0

Mostrar archivo

def mkLed(matrix_size=16):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    seq = Seq(m, 'seq', clk, rst)
    timer = m.Reg('timer', 32, initval=0)
    seq(timer.inc())

    datawidth = 32
    addrwidth = 10
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)

    def matmul(matrix_size, a_offset, b_offset, c_offset):
        start_time = timer
        comp(matrix_size, a_offset, b_offset, c_offset)
        end_time = timer
        time = end_time - start_time
        print("Time (cycles): %d" % time)
        check(matrix_size, a_offset, b_offset, c_offset)

    def strm_madd(strm, size, waddr):
        a = strm.read(ram_a, 0, size)
        b = strm.read(ram_b, 0, size)
        sum, valid = strm.RegionAdd(a * b, size)
        strm.write(ram_c, waddr, 1, sum, when=valid)

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            myaxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                myaxi.dma_read(ram_b, 0, b_addr, matrix_size)

                stream.run(matrix_size, j)
                stream.join()

                b_addr += matrix_size * (datawidth // 8)

            myaxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    def check(matrix_size, a_offset, b_offset, c_offset):
        all_ok = True
        c_addr = c_offset
        for i in range(matrix_size):
            myaxi.dma_read(ram_c, 0, c_addr, matrix_size)
            for j in range(matrix_size):
                v = ram_c.read(j)
                if i == j and vthread.verilog.NotEql(v, (i + 1) * 2):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
                if i != j and vthread.verilog.NotEql(v, 0):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
            c_addr += matrix_size * (datawidth // 8)

        if all_ok:
            print("OK")
        else:
            print("NG")

    stream = vthread.Stream(m, 'strm_madd', clk, rst, strm_madd)
    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start(matrix_size, 0, 1024, 2048)

    return m

Ejemplo n.º 21

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b
    v = strm.Ands(c > 140, c < 150)
    cnt = strm.ReduceAdd(v)
    strm.sink(c, 'c', when=v, when_name='v')
    strm.sink(cnt, 'cnt')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, 0)  # max_size
        strm.set_sink_immediate('cnt', 0)  # max_size
        strm.run()
        strm.join()
        cnt = strm.read_sink('cnt')
        print('# num of counted: %d' % cnt)
        return cnt

    def comp_sequential(size, offset):
        sum = 0
        addr = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            c = a + b
            if c > 140 and c < 150:
                ram_c.write(addr + offset, c)
                addr += 1
        print('# num of counted: %d' % addr)
        return addr

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        cnt = comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, cnt)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        cnt = comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, cnt)

        # verification
        myaxi.dma_read(ram_c, 0, 1024, cnt)
        myaxi.dma_read(ram_c, offset, 1024 * 2, cnt)
        check(cnt, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m

Ejemplo n.º 22

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        all_ok.value = True

        # Test for 4KB boundary check
        offset = myaxi.boundary_size - 4
        body(size, offset)

        if all_ok:
            print('ALL OK')

    def body(size, offset):
        # write
        for i in range(size):
            wdata = i + 100
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        for i in range(size):
            wdata = i + 1000
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset + myaxi.boundary_size
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 100):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

        # read
        laddr = 0
        gaddr = offset + myaxi.boundary_size
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 1000):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(256 + 256 + 64)

    return m

Ejemplo n.º 23

0

Mostrar archivo

Archivo: thread_stream_substream_reduce.py Proyecto: tanbour/veriloggen

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    reduce_size = 4

    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    ram_d = vthread.RAM(m, 'ram_d', clk, rst, datawidth, addrwidth)

    macstrm = vthread.Stream(m, 'macstream', clk, rst)
    macstrm_a = macstrm.source('a')
    macstrm_b = macstrm.source('b')
    macstrm_const = macstrm.constant('const')
    macstrm_mul = macstrm_a * macstrm_b
    macstrm_c, macstrm_v = macstrm.ReduceAddValid(macstrm_mul, macstrm_const)
    macstrm_v += 0
    macstrm.sink(macstrm_c, 'c')
    macstrm.sink(macstrm_v, 'v')

    strm = vthread.Stream(m, 'mystream', clk, rst)
    x = strm.source('x')
    y = strm.source('y')
    const = strm.constant('const')
    sub = strm.substream(macstrm)
    sub.to_source('a', x)
    sub.to_source('b', y)
    sub.to_constant('const', const)
    z = sub.from_sink('c')
    v = sub.from_sink('v')
    z = z + x
    strm.sink(z, 'z', when=v, when_name='v')

    def comp_stream_macstrm(size, offset):
        macstrm.set_source('a', ram_a, offset, size)
        macstrm.set_source('b', ram_b, offset, size)
        macstrm.set_constant('const', reduce_size)
        macstrm.set_sink('c', ram_c, offset, size)
        macstrm.set_sink('v', ram_d, offset, size)
        macstrm.run()
        macstrm.join()

    def comp_stream_mystrm(size, offset):
        strm.set_source('x', ram_a, offset, size)
        strm.set_source('y', ram_b, offset, size)
        strm.set_constant('const', reduce_size)
        strm.set_sink('z', ram_c, offset, size // reduce_size)
        strm.run()
        strm.join()

    def comp_sequential_macstrm(size, offset):
        sum = 0
        count = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum += a * b
            count += 1
            ram_c.write(i + offset, sum)
            ram_d.write(i + offset, count == (reduce_size - 1))
            if count == reduce_size:
                sum = 0
                count = 0

    def comp_sequential_mystrm(size, offset):
        sum = 0
        count = 0
        write_offset = offset
        for i in range(size):
            x = ram_a.read(i + offset)
            y = ram_b.read(i + offset)
            sum += x * y
            val = sum + x
            count += 1
            if count == reduce_size:
                ram_c.write(write_offset, val)
                write_offset += 1
                sum = 0
                count = 0

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
                print(i, st, sq)
        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        print('# macstream')
        check(size, 0, offset)

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size // reduce_size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size)

        # verification
        print('# mystream')
        check(size // reduce_size, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(16)

    return m

Ejemplo n.º 24

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    reduce_size = 4

    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    ram_d = vthread.RAM(m, 'ram_d', clk, rst, datawidth, addrwidth)

    macstrm = vthread.Stream(m, 'macstream', clk, rst)
    macstrm_a = macstrm.source('a')
    macstrm_b = macstrm.source('b')
    macstrm_const = macstrm.parameter('const')
    macstrm_mul = macstrm_a * macstrm_b
    macstrm_c, macstrm_v = macstrm.ReduceAddValid(macstrm_mul, macstrm_const)
    macstrm.sink(macstrm_c, 'c')
    macstrm.sink(macstrm_v, 'v')

    macstrm2 = vthread.Stream(m, 'macstream2', clk, rst)
    macstrm2_a = macstrm2.source('a')
    macstrm2_b = macstrm2.source('b')
    macstrm2_const = macstrm2.parameter('const')
    macstrm2_a = macstrm2_a + 1
    macstrm2_a = macstrm2_a - 1
    macstrm2_b = macstrm2_b * 1
    macsub = macstrm2.substream(macstrm)
    macsub.to_source('a', macstrm2_a)
    macsub.to_source('b', macstrm2_b)
    macsub.to_parameter('const', macstrm2_const)
    macstrm2_c = macsub.from_sink('c')
    macstrm2_v = macsub.from_sink('v')
    macstrm2.sink(macstrm2_c, 'c')
    macstrm2.sink(macstrm2_v, 'v')

    neststrm = vthread.Stream(m, 'neststream', clk, rst)
    neststrm_a = neststrm.source('a')
    neststrm_b = neststrm.source('b')
    neststrm_const = neststrm.parameter('const')
    neststrm_a += 1
    neststrm_a += 0
    neststrm_b += 1
    macsub = neststrm.substream(macstrm2)
    macsub.to_source('a', neststrm_a)
    macsub.to_source('b', neststrm_b)
    macsub.to_parameter('const', neststrm_const)
    neststrm_c = macsub.from_sink('c')
    neststrm_c += neststrm_a
    neststrm_c += 0
    neststrm_v = macsub.from_sink('v')
    neststrm.sink(neststrm_c, 'c')
    neststrm.sink(neststrm_v, 'v')

    strm = vthread.Stream(m, 'mystream', clk, rst)
    x = strm.source('x')
    y = strm.source('y')
    const = strm.parameter('const')
    sub = strm.substream(neststrm)
    sub.to_source('a', x)
    sub.to_source('b', y)
    sub.to_parameter('const', const)
    z = sub.from_sink('c')
    v = sub.from_sink('v')
    z = z + y
    strm.sink(z, 'z', when=v, when_name='v')

    all_ok = m.TmpReg(initval=0)

    def comp_stream_macstrm(size, offset):
        macstrm2.set_source('a', ram_a, offset, size)
        macstrm2.set_source('b', ram_b, offset, size)
        macstrm2.set_parameter('const', reduce_size)
        macstrm2.set_sink('c', ram_c, offset, size)
        macstrm2.set_sink('v', ram_d, offset, size)
        macstrm2.run()
        macstrm2.join()

    def comp_stream_mystrm(size, offset):
        strm.set_source('x', ram_a, offset, size)
        strm.set_source('y', ram_b, offset, size)
        strm.set_parameter('const', reduce_size)
        strm.set_sink('z', ram_c, offset, size // reduce_size)
        strm.run()
        strm.join()

    def comp_sequential_macstrm(size, offset):
        sum = 0
        count = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum += a * b
            count += 1
            ram_c.write(i + offset, sum)
            ram_d.write(i + offset, count == (reduce_size - 1))
            if count == reduce_size:
                sum = 0
                count = 0

    def comp_sequential_mystrm(size, offset):
        sum = 0
        count = 0
        write_offset = offset
        for i in range(size):
            x = ram_a.read(i + offset)
            y = ram_b.read(i + offset)
            sum += (x + 1) * (y + 1)
            val = sum + (x + 1) + y
            count += 1
            if count == reduce_size:
                ram_c.write(write_offset, val)
                write_offset += 1
                sum = 0
                count = 0

    def check(size, offset_stream, offset_seq):
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok.value = False
                print(i, st, sq)
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        all_ok.value = True

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        print('# macstream')
        check(size, 0, offset)

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size // reduce_size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size)

        # verification
        print('# mystream')
        check(size // reduce_size, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(16)

    return m

Ejemplo n.º 25

0

Mostrar archivo

def mkLed(matrix_size=16):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    seq = Seq(m, 'seq', clk, rst)
    timer = m.Reg('timer', 32, initval=0)
    seq(
        timer.inc()
    )

    addrwidth = 10
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)

    strm = vthread.Stream(m, 'strm_madd', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    size = strm.constant('size')
    sum, sum_valid = strm.ReduceAddValid(a * b, size)
    strm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    def strm_madd(size, waddr):
        strm.set_source('a', ram_a, 0, size)
        strm.set_source('b', ram_b, 0, size)
        strm.set_constant('size', size)
        strm.set_sink('sum', ram_c, waddr, 1)
        strm.run()
        strm.join()

    def matmul(matrix_size, a_offset, b_offset, c_offset):
        start_time = timer
        comp(matrix_size, a_offset, b_offset, c_offset)
        end_time = timer
        time = end_time - start_time
        print("Time (cycles): %d" % time)
        check(matrix_size, a_offset, b_offset, c_offset)
        vthread.finish()

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            myaxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                myaxi.dma_read(ram_b, 0, b_addr, matrix_size)

                strm_madd(matrix_size, j)

                b_addr += matrix_size * (datawidth // 8)

            myaxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    def check(matrix_size, a_offset, b_offset, c_offset):
        all_ok = True
        c_addr = c_offset
        for i in range(matrix_size):
            myaxi.dma_read(ram_c, 0, c_addr, matrix_size)
            for j in range(matrix_size):
                v = ram_c.read(j)
                if i == j and vthread.verilog.NotEql(v, (i + 1) * 2):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
                if i != j and vthread.verilog.NotEql(v, 0):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
            c_addr += matrix_size * (datawidth // 8)

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start(matrix_size, a_offset, b_offset, c_offset)

    return m

Ejemplo n.º 26

0

Mostrar archivo

def mkLed(word_datawidth=128):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10

    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myram = vthread.RAM(m,
                        'myram',
                        clk,
                        rst,
                        word_datawidth,
                        addrwidth,
                        numports=2)

    axi_in = vthread.AXIStreamInFifo(m,
                                     'axi_in',
                                     clk,
                                     rst,
                                     datawidth,
                                     with_last=True,
                                     noio=True)
    axi_out = vthread.AXIStreamOutFifo(m,
                                       'axi_out',
                                       clk,
                                       rst,
                                       datawidth,
                                       with_last=True,
                                       noio=True)

    maxi_in = vthread.AXIM_for_AXIStreamIn(axi_in, 'maxi_in')
    maxi_out = vthread.AXIM_for_AXIStreamOut(axi_out, 'maxi_out')

    fifo_addrwidth = 8
    fifo_in = vthread.FIFO(m, 'fifo_in', clk, rst, word_datawidth,
                           fifo_addrwidth)
    fifo_out = vthread.FIFO(m, 'fifo_out', clk, rst, word_datawidth,
                            fifo_addrwidth)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        all_ok.value = True

        for i in range(4):
            print('# iter %d start' % i)
            # Test for 4KB boundary check
            offset = i * 1024 * 16 + (myaxi.boundary_size -
                                      (word_datawidth // 8))
            body(size, offset)
            print('# iter %d end' % i)

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    def body(size, offset):
        # write a test vector
        for i in range(size):
            wdata = i + 100
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size, port=1)

        # AXI-stream read -> FIFO -> FIFO -> AXI-stream write
        maxi_in.dma_read_async(gaddr, size * (word_datawidth // datawidth))
        axi_in.write_fifo(fifo_in, size)

        for i in range(size):
            va = fifo_in.deq()
            fifo_out.enq(va)

        out_gaddr = (size + size) * (word_datawidth // 8) + offset
        maxi_out.dma_write_async(out_gaddr,
                                 size * (word_datawidth // datawidth))
        axi_out.read_fifo(fifo_out, size)

        # check
        myaxi.dma_read(myram, 0, gaddr, size, port=1)
        myaxi.dma_read(myram, size, out_gaddr, size, port=1)

        for i in range(size):
            v0 = myram.read(i)
            v1 = myram.read(i + size)
            if vthread.verilog.NotEql(v0, v1):
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(17)

    return m

Ejemplo n.º 27

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    img_width = strm.parameter('img_width')

    counter = strm.Counter()

    a = strm.source('a')
    a_addr = strm.Counter()

    sp = strm.Scratchpad(a, a_addr, length=128)

    a_old_addr = strm.Counter() - img_width
    a_old = sp.read(a_old_addr)

    b = a + a_old

    strm.sink(b, 'b', when=counter >= img_width)

    # add a stall condition
    count = m.Reg('count', 4, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(count.inc())

    util.add_disable_cond(strm.oready, 1, count == 0)

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size * 2)
        strm.set_sink('b', ram_b, offset, size)
        strm.set_parameter('img_width', size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        for i in range(size):
            a_buf = ram_a.read(i + offset)
            a = ram_a.read(i + offset + size)
            b = a_buf + a
            ram_b.write(i + offset, b)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_b.read(i + offset_stream)
            sq = ram_b.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size * 2)
        comp_stream(size, offset)
        myaxi.dma_write(ram_b, offset, 1024, size)

        # sequential
        offset = size * 4
        myaxi.dma_read(ram_a, offset, 0, size * 2)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_b, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m

Ejemplo n.º 28

0

Mostrar archivo

Archivo: thread_ram_dma_long.py Proyecto: howardyan93/veriloggen

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    def blink():
        size = 256 * 2
        offset = 1024 * 4

        # write
        for i in range(size):
            wdata = i
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myram.dma_write(myaxi, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # overwrite
        for i in range(size):
            wdata = 128
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset + size * 4
        myram.dma_write(myaxi, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        all_ok = True

        laddr = 0
        gaddr = offset
        myram.dma_read(myaxi, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if rdata != i:
                print('rdata[%d] = %d' % (i, rdata))
                all_ok = False

        # read
        laddr = 0
        gaddr = offset + size * 4
        myram.dma_read(myaxi, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if rdata != 128:
                print('rdata[%d] = %d' % (i, rdata))
                all_ok = False

        if all_ok:
            print('ALL OK')

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start()

    return m

Ejemplo n.º 29

0

Mostrar archivo

Archivo: thread_stream_pattern_zero.py Proyecto: yinxx/veriloggen

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    size = 16
    pattern = [(size, 0)]

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    sum = a + b
    strm.sink(sum, 'sum')

    def comp_stream(offset):
        strm.set_source_pattern('a', ram_a, offset + 10, pattern)
        strm.set_source_pattern('b', ram_b, offset + 10, pattern)
        strm.set_sink('sum', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(offset + 10)
            b = ram_b.read(offset + 10)
            sum = a + b
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp():
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 4, 1)

        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 8, 1)

        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m

Ejemplo n.º 30

0

Mostrar archivo

def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a * b
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()

        # double buffer of comp and cmd
        strm.set_source('a', ram_a, offset + size, size)
        strm.set_source('b', ram_b, offset + size, size)
        strm.set_sink('c', ram_c, offset + size, size)
        strm.source_join()

        strm.run()

        # double buffer of comp and cmd
        strm.set_source('a', ram_a, offset + size + size, size)
        strm.set_source('b', ram_b, offset + size + size, size)
        strm.set_sink('c', ram_c, offset + size + size, size)
        strm.source_join()

        strm.run()

        strm.source_join()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a * b
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        new_size = size + size + size
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, new_size)
        myaxi.dma_read(ram_b, offset, 512, new_size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, new_size)

        # sequential
        offset = new_size
        myaxi.dma_read(ram_a, offset, 0, new_size)
        myaxi.dma_read(ram_b, offset, 512, new_size)
        comp_sequential(new_size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, new_size)

        # verification
        myaxi.dma_read(ram_c, 0, 1024, new_size)
        myaxi.dma_read(ram_c, offset, 1024 * 2, new_size)
        check(new_size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m