Example #1
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32

    axi_a = vthread.AXIStreamIn(m, 'axi_a', clk, rst, datawidth, with_last=True)
    axi_b = vthread.AXIStreamOut(m, 'axi_b', clk, rst, datawidth, with_last=True)

    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth)

    def comp():

        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)
            saxi.write(1, 1)  # set busy
            size = saxi.read(2)

            for i in range(size):
                a, a_last = axi_a.read()
                b = a + 1
                b_last = a_last
                axi_b.write(b, b_last)

            saxi.write(1, 0)  # unset busy

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
Example #2
0
def mkLed():
    m = Module('add')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    saxi = vthread.AXISLiteRegister(m,
                                    'saxi',
                                    clk,
                                    rst,
                                    datawidth=32,
                                    length=8)

    def add():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)

            a = saxi.read(2)
            b = saxi.read(3)
            c = a + b

            saxi.write(4, c)
            saxi.write_flag(1, 1, resetvalue=0)

    th = vthread.Thread(m, 'th_add', clk, rst, add)
    fsm = th.start()

    return m
Example #3
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    def matmul():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)
            matrix_size = saxi.read(1)
            a_offset = saxi.read(2)
            b_offset = saxi.read(3)
            c_offset = saxi.read(4)
            comp(matrix_size, a_offset, b_offset, c_offset)
            saxi.write_flag(5, 1, resetvalue=0)

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            maxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                maxi.dma_read(ram_b, 0, b_addr, matrix_size)

                sum = 0
                for k in range(matrix_size):
                    x = ram_a.read(k)
                    y = ram_b.read(k)
                    sum += x * y
                ram_c.write(j, sum)

                b_addr += matrix_size * (datawidth // 8)

            maxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start()

    return m
Example #4
0
def mkMemcpy():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10

    ram_words = (2**addrwidth) // (datawidth // 8)

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    def memcpy():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)

            copy_bytes = saxi.read(1)
            src_offset = saxi.read(2)
            dst_offset = saxi.read(3)

            copy(copy_bytes, src_offset, dst_offset)

            saxi.write_flag(4, 1, resetvalue=0)

    def copy(copy_bytes, src_offset, dst_offset):
        rest_words = copy_bytes // (datawidth // 8)
        src_global_addr = src_offset
        dst_global_addr = dst_offset
        local_addr = 0

        while rest_words > 0:
            if rest_words > ram_words:
                dma_size = ram_words
            else:
                dma_size = rest_words

            maxi.dma_read(ram_a, local_addr, src_global_addr, dma_size)
            maxi.dma_write(ram_a, local_addr, dst_global_addr, dma_size)

            src_global_addr += dma_size * (datawidth // 8)
            dst_global_addr += dma_size * (datawidth // 8)
            rest_words -= dma_size

    th = vthread.Thread(m, 'th_memcpy', clk, rst, memcpy)
    fsm = th.start()

    return m
Example #5
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10

    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth)

    def wait(fsm, sleep):
        cnt = fsm.m.TmpReg(32, initval=0)
        fsm.If(cnt < sleep)(
            cnt.inc()
        )
        fsm.If(cnt >= sleep)(
            cnt(0)
        )
        fsm.Then().goto_next()

    def blink(size):
        while True:
            # wait start
            saxi.wait_flag(0, value=1, resetvalue=0)
            # reset done
            saxi.write(3, 0)

            sleep = saxi.read(1)
            size = saxi.read(2)

            for i in range(size):
                wait(sleep)
                led.value += 1

            # done
            saxi.write_flag(3, 1, resetvalue=0)

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    th.add_intrinsics(wait)
    fsm = th.start(16)

    return m
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        # wait start
        saxi.wait_flag(0, value=1, resetvalue=0)
        # reset done
        saxi.write(1, 0)

        all_ok.value = True

        for i in range(4):
            print('# iter %d start' % i)
            # Test for 4KB boundary check
            offset = i * 1024 * 16 + (myaxi.boundary_size - 4)
            body(size, offset)
            print('# iter %d end' % i)

        if all_ok:
            print('# verify (local): PASSED')
        else:
            print('# verify (local): FAILED')

        # result
        saxi.write(2, all_ok)

        # done
        saxi.write_flag(1, 1, resetvalue=0)

    def body(size, offset):
        # write
        for i in range(size):
            wdata = i + 100
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        for i in range(size):
            wdata = i + 1000
            myram.write(i, wdata)

        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 100):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

        # read
        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 1000):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(16)

    return m
Example #7
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10

    axi_a = vthread.AXIStreamIn(m,
                                'axi_a',
                                clk,
                                rst,
                                datawidth,
                                with_last=True)
    axi_b = vthread.AXIStreamIn(m,
                                'axi_b',
                                clk,
                                rst,
                                datawidth,
                                with_last=True)
    axi_c = vthread.AXIStreamOut(m,
                                 'axi_c',
                                 clk,
                                 rst,
                                 datawidth,
                                 with_last=True)

    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth)

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth, numports=2)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth, numports=2)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth, numports=2)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)
            saxi.write(1, 1)  # set busy
            size = saxi.read(2)
            offset = 0

            axi_a.write_ram(ram_a, offset, size, port=1)  # blocking read
            axi_b.write_ram(ram_b, offset, size, port=1)  # blocking read
            comp_stream(size, offset)
            axi_c.read_ram(ram_c, offset, size, port=1)  # blocking write

            saxi.write(1, 0)  # unset busy

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
def mkMemcpy():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10
    ram_words = (2**addrwidth) // (datawidth // 8)

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth, numports=2)
    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    # import verilog submodule
    start = m.Reg('start', initval=0)
    busy = m.Wire('busy')
    size = m.Reg('size', addrwidth, initval=0)

    sub = Submodule(m,
                    pe_verilog_code,
                    'inst_pe',
                    prefix='pe_',
                    arg_params=(('ADDR_WIDTH', addrwidth), ('DATA_WIDTH',
                                                            datawidth)),
                    arg_ports=(('CLK', clk), ('RST', rst), ('start', start),
                               ('busy', busy), ('size', size)),
                    as_wire=('addr', 'rdata', 'wdata', 'wenable'))

    # connect ports to RAM
    ram_a.connect_rtl(1, sub['addr'], sub['wdata'], sub['wenable'],
                      sub['rdata'])

    def control_processing_unit(v):
        size.value = v
        start.value = 1
        start.value = 0
        while busy:
            pass

    def memcpy():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)

            copy_bytes = saxi.read(1)
            src_offset = saxi.read(2)
            dst_offset = saxi.read(3)

            copy(copy_bytes, src_offset, dst_offset)

            saxi.write_flag(4, 1, resetvalue=0)

    def copy(copy_bytes, src_offset, dst_offset):
        rest_words = copy_bytes // (datawidth // 8)
        src_global_addr = src_offset
        dst_global_addr = dst_offset
        local_addr = 0

        while rest_words > 0:
            if rest_words > ram_words:
                dma_size = ram_words
            else:
                dma_size = rest_words

            maxi.dma_read(ram_a, local_addr, src_global_addr, dma_size)
            control_processing_unit(dma_size)
            maxi.dma_write(ram_a, local_addr, dst_global_addr, dma_size)

            src_global_addr += dma_size * (datawidth // 8)
            dst_global_addr += dma_size * (datawidth // 8)
            rest_words -= dma_size

    th = vthread.Thread(m, 'th_memcpy', clk, rst, memcpy)
    fsm = th.start()

    return m
Example #9
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)

    saxi_length = 5
    saxi = vthread.AXISLiteRegister(m,
                                    'saxi',
                                    clk,
                                    rst,
                                    datawidth=datawidth,
                                    length=saxi_length)

    ram_src = vthread.RAM(m, 'ram_src', clk, rst, datawidth, addrwidth)
    ram_dummy_src = vthread.RAM(m, 'ram_dummy_src', clk, rst, datawidth,
                                addrwidth)
    ram_dst = vthread.RAM(m, 'ram_dst', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    dummy_src = strm.source('dummy_src')
    c = strm.Counter(initval=0, size=4)
    x = strm.Counter(initval=0, size=8, enable=(c == 3))
    y = strm.Counter(initval=0, size=8, enable=((c == 3) & (x == 7)))

    shift_cond = (x & 1 == 0) & ((y & 1) == 0)
    rotate_cond1 = (((((x & 1) == 0) & ((y & 1) == 0)) == 0) &
                    (((x & 1) == 0) == 0))
    rotate_cond2 = (((((x & 1) == 0) & ((y & 1) == 0)) == 0) & ((x & 1) == 0))
    read_cond = shift_cond
    addrcounter = strm.Counter(initval=0, enable=read_cond)
    src = strm.read_RAM('ram_src',
                        addr=addrcounter,
                        when=read_cond,
                        datawidth=datawidth)
    counter = strm.Counter(initval=0)
    width = strm.parameter('width')
    height = strm.parameter('height')

    linebuf = strm.LineBuffer(shape=(1, 1, 1),
                              memlens=[4, 13],
                              head_initvals=[0, 0],
                              tail_initvals=[3, 12],
                              data=src,
                              shift_cond=shift_cond,
                              rotate_conds=[rotate_cond1, rotate_cond2])
    dst = linebuf.get_window(0)

    strm.sink(dst, 'dst')

    # add a stall condition
    count = m.Reg('count', 4, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(count.inc())

    util.add_disable_cond(strm.oready, 1, count == 0)

    def comp_stream(channel, width, height, offset):
        strm.set_source('dummy_src', ram_dummy_src, offset,
                        channel * width * height * 2 * 2)
        strm.set_read_RAM('ram_src', ram_src)
        strm.set_sink('dst', ram_dst, offset, channel * width * height * 2 * 2)
        strm.set_parameter('width', width)
        strm.set_parameter('height', height)
        strm.run()
        strm.join()

    def comp_sequential(channel, width, height, roffset, woffset):
        for yy in range(height * 2):
            for xx in range(width * 2):
                for c in range(channel):
                    # f(c, x, y) = in(c, x/2, y/2);
                    src_i = (xx // 2) * channel + (yy //
                                                   2) * width * channel + c
                    dst_i = xx * channel + yy * width * 2 * channel + c
                    val = ram_src.read(roffset + src_i)
                    ram_dst.write(woffset + dst_i, val)

    def check(offset_stream, offset_seq, size):
        all_ok = True
        for i in range(size):
            st = ram_dst.read(offset_stream + i)
            sq = ram_dst.read(offset_seq + i)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp():
        saxi.write(addr=1, value=0)
        saxi.wait_flag(0, value=1, resetvalue=0)
        channel = saxi.read(2)
        width = saxi.read(3)
        height = saxi.read(4)
        insize = channel * width * height
        outsize = channel * width * 2 * height * 2

        roffset = 0
        woffset = 0
        myaxi.dma_read(ram_src, roffset, 0, insize)
        comp_stream(channel, width, height, roffset)
        myaxi.dma_write(ram_dst, woffset, 1024, outsize)

        roffset = insize
        woffset = outsize
        myaxi.dma_read(ram_src, roffset, 0, insize)
        comp_sequential(channel, width, height, roffset, woffset)
        myaxi.dma_write(ram_dst, woffset, 2 * 1024, outsize)

        check(0, woffset, outsize)
        saxi.write(addr=1, value=1)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    def matmul():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)
            matrix_size = saxi.read(1)
            a_offset = saxi.read(2)
            b_offset = saxi.read(3)
            c_offset = saxi.read(4)
            comp(matrix_size, a_offset, b_offset, c_offset)
            #check(matrix_size, a_offset, b_offset, c_offset)
            saxi.write_flag(5, 1, resetvalue=0)

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            ram_a.dma_read(maxi, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                ram_b.dma_read(maxi, 0, b_addr, matrix_size)

                sum = 0
                for k in range(matrix_size):
                    x = ram_a.read(k)
                    y = ram_b.read(k)
                    sum += x * y
                ram_c.write(j, sum)

                b_addr += matrix_size * (datawidth // 8)

            ram_c.dma_write(maxi, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    def check(matrix_size, a_offset, b_offset, c_offset):
        all_ok = True
        c_addr = c_offset
        for i in range(matrix_size):
            ram_c.dma_read(maxi, 0, c_addr, matrix_size)
            for j in range(matrix_size):
                v = ram_c.read(j)
                if i == j and v != (i + 1) * 2:
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
                if i != j and v != 0:
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
            c_addr += matrix_size * (datawidth // 8)

        if all_ok:
            led.value = 0b01010101
            print("OK")
        else:
            led.value = 0x0f
            print("NG")

    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start()

    return m
Example #11
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)

    saxi_length = 4
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth=datawidth, length=saxi_length)

    ram_src = vthread.RAM(m, 'ram_src', clk, rst, datawidth, addrwidth)
    ram_dummy_src = vthread.RAM(m, 'ram_dummy_src', clk, rst, datawidth, addrwidth)
    ram_dst = vthread.RAM(m, 'ram_dst', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    dummy_src = strm.source('dummy_src')
    x = strm.Counter(initval=0, size=8)
    y = strm.Counter(initval=0, size=8, enable=(x == 7))

    shift_cond = ((x & 1 == 0) & (y & 1 == 0))
    rotate_cond = ((shift_cond == 0) & (x & 1 == 0))
    read_cond = shift_cond
    addrcounter = strm.Counter(initval=0, enable=read_cond)
    src = strm.read_RAM('ram_src', addr=addrcounter, when=read_cond, datawidth=datawidth)
    counter = strm.Counter(initval=0)
    width = strm.parameter('width')
    height = strm.parameter('height')

    linebuf = strm.LineBuffer(shape=(1, 1), memlens=[4],
                              head_initvals=[0], tail_initvals=[3],
                              data=src, shift_cond=shift_cond, rotate_conds=[rotate_cond])
    dst = linebuf.get_window(0)

    strm.sink(dst, 'dst')

    def comp_stream(width, height, offset):
        strm.set_source('dummy_src', ram_dummy_src, offset, width * height * 2 * 2)
        strm.set_read_RAM('ram_src', ram_src)
        strm.set_sink('dst', ram_dst, offset, width * height * 2 * 2)
        strm.set_parameter('width', width)
        strm.set_parameter('height', height)
        strm.run()
        strm.join()

    def comp_sequential(width, height, roffset, woffset):
        for y in range(height * 2):
            for x in range(width * 2):
                src_i = x // 2 + (y // 2) * width
                dst_i = x + y * width * 2
                val = ram_src.read(roffset + src_i)
                ram_dst.write(woffset + dst_i, val)

    def check(offset_stream, offset_seq, size):
        all_ok = True
        for i in range(size):
            st = ram_dst.read(offset_stream + i)
            sq = ram_dst.read(offset_seq + i)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp():
        saxi.write(addr=1, value=0)
        saxi.wait_flag(0, value=1, resetvalue=0)
        width = saxi.read(2)
        height = saxi.read(3)
        in_size = width * height
        out_size = width * height * 2 * 2

        roffset = 0
        woffset = 0

        myaxi.dma_read(ram_src, roffset, 0, in_size)
        comp_stream(width, height, roffset)
        myaxi.dma_write(ram_dst, woffset, 1024, out_size)

        roffset = in_size
        woffset = out_size

        myaxi.dma_read(ram_src, roffset, 0, in_size)
        comp_sequential(width, height, roffset, woffset)
        myaxi.dma_write(ram_dst, woffset, 2 * 1024, out_size)

        check(0, woffset, out_size)
        saxi.write(addr=1, value=1)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
Example #12
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10

    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    maxi.disable_write()

    saxi = vthread.AXISLiteRegister(m,
                                    'saxi',
                                    clk,
                                    rst,
                                    datawidth=32,
                                    length=8)

    axi_in = vthread.AXIStreamInFifo(m,
                                     'axi_in',
                                     clk,
                                     rst,
                                     datawidth,
                                     with_last=True)
    axi_out = vthread.AXIStreamOutFifo(m,
                                       'axi_out',
                                       clk,
                                       rst,
                                       datawidth,
                                       with_last=True)

    fifo_addrwidth = 8
    fifo_a = vthread.FIFO(m, 'fifo_a', clk, rst, datawidth, fifo_addrwidth)
    fifo_b = vthread.FIFO(m, 'fifo_b', clk, rst, datawidth, fifo_addrwidth)
    fifo_c = vthread.FIFO(m, 'fifo_c', clk, rst, datawidth, fifo_addrwidth)

    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)

    strm0 = vthread.Stream(m, 'mystream_reduce', clk, rst)
    a = strm0.source('a')
    reduce_size = strm0.parameter('reduce_size')
    v = a * a
    sum, sum_valid = strm0.ReduceAddValid(v, reduce_size)
    strm0.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    strm1 = vthread.Stream(m, 'mystream_bias', clk, rst)
    x = strm1.source('x')
    y = strm1.source('y')
    z = x + y
    strm1.sink(z, 'z')

    def comp():

        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)

            saxi.write(1, 1)  # set busy

            read_size = saxi.read(2)
            write_size = saxi.read(3)
            reduce_size = saxi.read(4)
            bias_addr = saxi.read(5)

            if read_size <= 0:
                read_size = 1
            if write_size <= 0:
                write_size = 1
            if reduce_size <= 0:
                reduce_size = 1

            maxi.dma_read(ram_b, 0, bias_addr, write_size)

            axi_in.write_fifo(fifo_a, read_size)
            axi_out.read_fifo(fifo_c, write_size)

            strm0.set_source_fifo('a', fifo_a, read_size)
            strm0.set_parameter('reduce_size', reduce_size)
            strm0.set_sink_fifo('sum', fifo_b, write_size)

            strm1.set_source_fifo('x', fifo_b, write_size)
            strm1.set_source('y', ram_b, 0, write_size)
            strm1.set_sink_fifo('z', fifo_c, write_size)

            strm0.run()
            strm1.run()

            strm0.join()
            strm1.join()

            saxi.write(1, 0)  # unset busy

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
Example #13
0
def mkLed(axi_datawidth=32, datawidth=4, addrwidth=10):
    if datawidth >= 8:
        raise ValueError('not supported.')

    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    numbanks = int(math.ceil(axi_datawidth / datawidth))
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, axi_datawidth)
    myram = vthread.MultibankRAM(m,
                                 'myram',
                                 clk,
                                 rst,
                                 datawidth,
                                 addrwidth,
                                 numbanks=numbanks)

    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, 32)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        # wait start
        saxi.wait_flag(0, value=1, resetvalue=0)
        # reset done
        saxi.write(1, 0)

        all_ok.value = True
        # Test for 4KB boundary check
        offset = 1024 * 16 + (myaxi.boundary_size - 4)
        body(size, offset)

        if all_ok:
            print('# verify (local): PASSED')
        else:
            print('# verify (local): FAILED')

        # result
        saxi.write(2, all_ok)

        # done
        saxi.write_flag(1, 1, resetvalue=0)

    def body(size, offset):
        # read and modify
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i) & (2**datawidth - 1)
            verify = (offset * 8 // datawidth + i) % (2**datawidth - 1) + 1
            wdata = (verify + 1000) % (2**datawidth - 1)
            myram.write(i, wdata)
            if vthread.verilog.NotEql(rdata, verify):
                print('rdata[%d] = %d (!= %d)' % (i, rdata, verify))
                all_ok.value = False

        # write
        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read (verify)
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i) & (2**datawidth - 1)
            verify = (((offset * 8 // datawidth + i) %
                       (2**datawidth - 1) + 1 + 1000) % (2**datawidth - 1))
            if vthread.verilog.NotEql(rdata, verify):
                print('rdata[%d] = %d (!= %d)' % (i, rdata, verify))
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(32)

    return m
Example #14
0
def mkMemcpy():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10

    ram_words = (2 ** addrwidth) // (datawidth // 8)

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    code = m.EmbeddedCode("""
reg [31:0] sum;
always @(posedge CLK) begin
  if(RST) begin
    sum <= 0;
    led <= 0;
  end else begin
    if({interface.wenable}) begin
      sum <= sum + {interface.wdata};
    end
    led <= sum;
  end 
end
""".format(interface=ram_a.interfaces[0]))

    def memcpy():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)

            copy_bytes = saxi.read(1)
            src_offset = saxi.read(2)
            dst_offset = saxi.read(3)

            copy(copy_bytes, src_offset, dst_offset)

            saxi.write_flag(4, 1, resetvalue=0)

    def copy(copy_bytes, src_offset, dst_offset):
        rest_words = copy_bytes // (datawidth // 8)
        src_global_addr = src_offset
        dst_global_addr = dst_offset
        local_addr = 0

        while rest_words > 0:
            if rest_words > ram_words:
                dma_size = ram_words
            else:
                dma_size = rest_words

            maxi.dma_read(ram_a, local_addr, src_global_addr, dma_size)
            maxi.dma_write(ram_a, local_addr, dst_global_addr, dma_size)

            src_global_addr += dma_size * (datawidth // 8)
            dst_global_addr += dma_size * (datawidth // 8)
            rest_words -= dma_size

    th = vthread.Thread(m, 'th_memcpy', clk, rst, memcpy)
    fsm = th.start()

    return m
Example #15
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)

    saxi_length = 4
    saxi = vthread.AXISLiteRegister(m,
                                    'saxi',
                                    clk,
                                    rst,
                                    datawidth=datawidth,
                                    length=saxi_length)

    ram_src = vthread.RAM(m, 'ram_src', clk, rst, datawidth, addrwidth)
    ram_dst = vthread.RAM(m, 'ram_dst', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    src = strm.source('src')
    counter = strm.Counter(initval=0)
    width = strm.parameter('width')
    height = strm.parameter('height')

    # shift x20
    # rotate x10
    # shift, rotate x34
    shift_cond = strm.Or((counter < 20),
                         ((counter >= 30) & (counter & 1 == 0)))
    rotate_cond = strm.Or(((counter >= 20) & (counter < 30)),
                          ((counter >= 30) & (counter & 1 == 1)))

    linebuf = strm.LineBuffer(shape=(3, 3),
                              memlens=[4],
                              data=src,
                              head_initvals=[0],
                              tail_initvals=[3],
                              shift_cond=shift_cond,
                              rotate_conds=[rotate_cond])

    window = [None] * 9
    for y in range(3):
        for x in range(3):
            window[y * 3 + x] = linebuf.get_window(y * 3 + x)

    # The window register contains an invalid value in the beginning
    # because the initial value of shift memory is undefined.
    # Do not output sum until all the window register have valid value.
    dst = strm.Mux(counter < 20, window[8], strm.AddN(*window))
    strm.sink(dst, 'dst')

    # add a stall condition
    count = m.Reg('count', 4, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(count.inc())

    util.add_disable_cond(strm.oready, 1, count == 0)

    # for sequential
    ram_bufs = [
        vthread.RAM(m, 'ram_buf' + str(i), clk, rst, datawidth, addrwidth)
        for i in range(3)
    ]

    def comp_stream(width, height, offset):
        strm.set_source('src', ram_src, offset, width * height)
        strm.set_sink('dst', ram_dst, offset, width * height)
        strm.set_parameter('width', width)
        strm.set_parameter('height', height)
        strm.run()
        strm.join()

    def comp_sequential(width, height, offset):
        head = 0
        tail = 3
        window_0 = window_1 = window_2 = 0
        window_3 = window_4 = window_5 = 0
        window_6 = window_7 = window_8 = 0

        for i in range(width * height):
            src = ram_src.read(offset + i)
            shift = ((i < 20) or ((i >= 30) and (i & 1 == 0)))
            rotate = (((i >= 20) and (i < 30)) or ((i >= 30) and (i & 1 == 1)))
            if shift:
                ram_bufs[2].write(tail, window_8)
                window_8 = window_7
                window_7 = window_6
                window_6 = ram_bufs[1].read(head)
                ram_bufs[1].write(tail, window_5)
                window_5 = window_4
                window_4 = window_3
                window_3 = ram_bufs[0].read(head)
                ram_bufs[0].write(tail, window_2)
                window_2 = window_1
                window_1 = window_0
                window_0 = src
                head = head + 1 if head < 3 else 0
                tail = tail + 1 if tail < 3 else 0
            elif rotate:
                ram_bufs[2].write(tail, window_8)
                window_8 = window_7
                window_7 = window_6
                window_6 = ram_bufs[2].read(head)
                ram_bufs[1].write(tail, window_5)
                window_5 = window_4
                window_4 = window_3
                window_3 = ram_bufs[1].read(head)
                ram_bufs[0].write(tail, window_2)
                window_2 = window_1
                window_1 = window_0
                window_0 = ram_bufs[0].read(head)
                head = head + 1 if head < 3 else 0
                tail = tail + 1 if tail < 3 else 0
            sum = window_0 + window_1 + window_2 + window_3 + \
                window_4 + window_5 + window_6 + window_7 + window_8
            if i < 20:
                ram_dst.write(offset + i, window_0)
            else:
                ram_dst.write(offset + i, sum)

    def check(offset_stream, offset_seq, size):
        all_ok = True
        for i in range(size):
            st = ram_dst.read(offset_stream + i)
            sq = ram_dst.read(offset_seq + i)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp():
        saxi.write(addr=1, value=0)
        saxi.wait_flag(0, value=1, resetvalue=0)
        width = saxi.read(2)
        height = saxi.read(3)
        size = width * height

        offset = 0
        myaxi.dma_read(ram_src, offset, 0, size)
        comp_stream(width, height, offset)
        myaxi.dma_write(ram_dst, offset, 1024, size)

        offset = size
        myaxi.dma_read(ram_src, offset, 0, size)
        comp_sequential(width, height, offset)
        myaxi.dma_write(ram_dst, offset, 2 * 1024, size)

        check(0, offset, size)
        saxi.write(addr=1, value=1)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
Example #16
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    addrwidth = 10
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    # Stream definition
    strm = vthread.Stream(m, 'strm_madd', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    size = strm.parameter('size')
    sum, sum_valid = strm.ReduceAddValid(a * b, size)
    strm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    def strm_madd(size, waddr):
        strm.set_source('a', ram_a, 0, size)
        strm.set_source('b', ram_b, 0, size)
        strm.set_parameter('size', size)
        strm.set_sink('sum', ram_c, waddr, 1)
        strm.run()
        strm.join()

    def matmul():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)
            matrix_size = saxi.read(1)
            a_offset = saxi.read(2)
            b_offset = saxi.read(3)
            c_offset = saxi.read(4)
            comp(matrix_size, a_offset, b_offset, c_offset)
            saxi.write_flag(5, 1, resetvalue=0)

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            maxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                maxi.dma_read(ram_b, 0, b_addr, matrix_size)

                strm_madd(matrix_size, j)

                b_addr += matrix_size * (datawidth // 8)

            maxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start()

    return m