Example #1
0
def mul_rshift(m, clk, rst,
               x_datawidth, x_point, x_signed,
               y_datawidth, y_point, y_signed,
               mul_width=None, mul_point=None, mul_signed=None):

    name = _tmp_name('mul')
    datawidth = max(x_datawidth, y_datawidth)
    point = max(x_point, y_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1

    z = x * y
    z.latency = 4
    if mul_width is not None:
        z.width = mul_width
    if mul_signed is not None:
        z.signed = mul_signed
    if mul_point is not None and point != mul_point:
        z = stream.Cast(z, point=mul_point)
    z = stream.Sra(z, rshift)

    stream.sink(z, 'z')
    return stream
Example #2
0
def madd_rshift(m, clk, rst,
                x_datawidth, x_point, x_signed,
                y_datawidth, y_point, y_signed,
                z_datawidth, z_point, z_signed,
                sum_width=None, sum_point=None, sum_signed=None):

    name = _tmp_name('madd')
    datawidth = max(x_datawidth, y_datawidth, z_datawidth)
    point = max(x_point, y_point, z_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)
    z = stream.source('z', z_datawidth, z_point, z_signed)
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1

    sum = stream.Madd(x, y, z)
    sum.latency = 4
    if mul_width is not None:
        sum.width = mul_width
    if mul_signed is not None:
        sum.signed = mul_signed
    if mul_point is not None and point != mul_point:
        sum = stream.Cast(sum, point=mul_point)
    sum = stream.Sra(sum, rshift)

    stream.sink(sum, 'sum')
    return stream
Example #3
0
def mul(m,
        clk,
        rst,
        x_datawidth,
        x_point,
        x_signed,
        y_datawidth,
        y_point,
        y_signed,
        mul_width=None,
        mul_point=None,
        mul_signed=None):

    name = _tmp_name('mul')
    datawidth = max(x_datawidth, y_datawidth)
    point = max(x_point, y_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)

    z = x * y
    z.latency = 4
    if mul_width is not None:
        z.width = mul_width
    if mul_signed is not None:
        z.signed = mul_signed
    if mul_point is not None and point != mul_point:
        z = stream.Cast(z, point=mul_point)

    stream.sink(z, 'z')
    return stream
Example #4
0
def acc_rshift_round_frac(m, clk, rst,
                          datawidth, point, signed,
                          sum_width=None, sum_point=None, sum_signed=None):

    name = _tmp_name('acc')

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', datawidth, point, signed)
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1
    size = stream.constant('size', signed=False)

    frac = stream.Mux(rshift > 0, stream.Sll(1, rshift - 1), 0)
    frac.width = sum_width

    sum, v = stream.ReduceAddValid(x, size, width=sum_width, signed=sum_signed)
    if sum_point is not None and point != sum_point:
        sum = stream.Cast(sum, point=sum_point)

    sum = sum + frac
    sum = stream.Sra(sum, rshift)

    stream.sink(sum, 'sum')
    stream.sink(v, 'valid')
    return stream
Example #5
0
def div_const(m,
              clk,
              rst,
              x_datawidth,
              x_point,
              x_signed,
              y_datawidth,
              y_point,
              y_signed,
              div_width=None,
              div_point=None,
              div_signed=None):

    name = _tmp_name('div_const')
    datawidth = max(x_datawidth, y_datawidth)
    point = max(x_point, y_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)

    z = stream.Div(x, y)
    if div_width is not None:
        z.width = div_width
    if div_signed is not None:
        z.signed = div_signed
    if div_point is not None and point != div_point:
        z = stream.Cast(z, point=div_point)

    stream.sink(z, 'z')
    return stream
Example #6
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    size = strm.constant('size')
    sum, sum_valid = strm.ReduceAddValid(a, size)
    strm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_constant('size', size)
        strm.set_sink('sum', ram_b, offset, 1)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            sum += a
        ram_b.write(offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_b.read(i + offset_stream)
            sq = ram_b.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp(size):
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_b, offset, 1024, 1)

        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_b, offset, 1024 * 2, 1)

        check(1, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Example #7
0
def mac_rshift_round(m, clk, rst,
                     x_datawidth, x_point, x_signed,
                     y_datawidth, y_point, y_signed,
                     mul_width=None, mul_point=None, mul_signed=None,
                     sum_width=None, sum_point=None, sum_signed=None):

    name = _tmp_name('mac')
    datawidth = max(x_datawidth, y_datawidth)
    point = max(x_point, y_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1
    size = stream.constant('size', signed=False)

    z = x * y
    z.latency = 4
    if mul_width is not None:
        z.width = mul_width
    if mul_signed is not None:
        z.signed = mul_signed
    if mul_point is not None and point != mul_point:
        z = stream.Cast(z, point=mul_point)
    z = stream.SraRound(z, rshift)
    sum, v = stream.ReduceAddValid(z, size, width=sum_width, signed=sum_signed)
    if sum_point is not None and point != sum_point:
        sum = stream.Cast(sum, point=sum_point)

    stream.sink(sum, 'sum')
    stream.sink(v, 'valid')
    return stream
Example #8
0
def mul_rshift_clip(m,
                    clk,
                    rst,
                    x_datawidth,
                    x_point,
                    x_signed,
                    y_datawidth,
                    y_point,
                    y_signed,
                    mul_width=None,
                    mul_point=None,
                    mul_signed=None,
                    out_width=None,
                    out_point=None,
                    out_signed=None,
                    asymmetric_clip=False):

    name = _tmp_name('mul_rshift_clip')
    datawidth = max(x_datawidth, y_datawidth)
    point = max(x_point, y_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1

    z = x * y
    z.latency = 4
    if mul_width is not None:
        z.width = mul_width
    if mul_signed is not None:
        z.signed = mul_signed
    if mul_point is not None and point != mul_point:
        z = stream.Cast(z, point=mul_point)
    z = stream.Sra(z, rshift)

    p_th = (1 << (out_width - 1)) - 1
    if asymmetric_clip:
        n_th = -1 * p_th - 1
    else:
        n_th = -1 * p_th

    p_th = p_th >> out_point
    n_th = n_th >> out_point

    p = stream.Mux(z > p_th, p_th, z)
    n = stream.Mux(z < n_th, n_th, z)
    z = stream.Mux(z >= 0, p, n)

    if out_width is not None:
        z.width = out_width
    if out_signed is not None:
        z.signed = out_signed
    if out_point is not None and z.point != out_point:
        z = stream.Cast(z, point=out_point)

    stream.sink(z, 'z')
    return stream
Example #9
0
def madd_rshift_clip(m,
                     clk,
                     rst,
                     x_datawidth,
                     x_point,
                     x_signed,
                     y_datawidth,
                     y_point,
                     y_signed,
                     z_datawidth,
                     z_point,
                     z_signed,
                     sum_width=None,
                     sum_point=None,
                     sum_signed=None,
                     out_width=None,
                     out_point=None,
                     out_signed=None):

    name = _tmp_name('madd')
    datawidth = max(x_datawidth, y_datawidth, z_datawidth)
    point = max(x_point, y_point, z_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)
    z = stream.source('z', z_datawidth, z_point, z_signed)
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1

    sum = stream.Madd(x, y, z)
    sum.latency = 4
    if mul_width is not None:
        sum.width = mul_width
    if mul_signed is not None:
        sum.signed = mul_signed
    if mul_point is not None and point != mul_point:
        sum = stream.Cast(sum, point=mul_point)
    sum = stream.Sra(sum, rshift)

    p_th = (1 << (out_width - 1)) - 1
    n_th = -1 * p_th
    p_th = p_th >> out_point
    n_th = n_th >> out_point

    p = stream.Mux(sum > p_th, p_th, sum)
    n = stream.Mux(sum < n_th, n_th, sum)
    sum = stream.Mux(sum >= 0, p, n)

    if out_width is not None:
        sum.width = out_width
    if out_signed is not None:
        sum.signed = out_signed
    if out_point is not None and sum.point != out_point:
        sum = stream.Cast(sum, point=out_point)

    stream.sink(sum, 'sum')
    return stream
Example #10
0
def average(m, clk, rst,
            datawidth, point, signed, num_vars):

    name = _tmp_name('average')

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    vars = [stream.source('var%d' % i, datawidth, point, signed)
            for i in range(num_vars)]

    val = stream.Average(*vars)
    stream.sink(val, 'val')
    return stream
Example #11
0
def add_tree_rshift_round(m, clk, rst,
                          datawidth, point, signed, num_vars):

    name = _tmp_name('add_tree')

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    vars = [stream.source('var%d' % i, datawidth, point, signed)
            for i in range(num_vars)]
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1

    sum = Add3Tree(stream, *vars)
    sum = stream.SraRound(sum, rshift)
    stream.sink(sum, 'sum')
    return stream
Example #12
0
def add_tree(m, clk, rst,
             datawidth, point, signed, num_vars):

    name = _tmp_name('add_tree')

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    vars = [stream.source('var%d' % i, datawidth, point, signed)
            for i in range(num_vars)]

    if len(vars) == 1:
        sum = stream.Cast(vars[0])
    else:
        sum = Add3Tree(stream, *vars)
    stream.sink(sum, 'sum')
    return stream
Example #13
0
def lshift_rshift(m,
                  clk,
                  rst,
                  x_datawidth,
                  x_point,
                  x_signed,
                  y_datawidth,
                  y_point,
                  y_signed,
                  mul_width=None,
                  mul_point=None,
                  mul_signed=None):

    if y_point != 0:
        raise ValueError('not supported')

    name = _tmp_name('lshift')
    datawidth = max(x_datawidth, y_datawidth)
    point = max(x_point, y_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1

    abs_y = stream.Abs(y)
    sign_y = stream.Sign(y)

    z = stream.Sll(x, abs_y)
    z.latency = 0
    z = stream.Cast(z, signed=x_signed)
    z.latency = 0
    z = stream.Mux(sign_y, stream.Complement2(z), z)
    z.latency = 0
    z = stream.Cast(z, signed=x_signed)
    z.latency = 0
    if mul_width is not None:
        z.width = mul_width
    if mul_signed is not None:
        z.signed = mul_signed
    if mul_point is not None and point != mul_point:
        z = stream.Cast(z, point=mul_point)
        z.latency = 0
    z = stream.SraRound(z, rshift)

    stream.sink(z, 'z')
    return stream
Example #14
0
def updown_mask_rshift(m,
                       clk,
                       rst,
                       x_datawidth,
                       x_point,
                       x_signed,
                       y_datawidth,
                       y_point,
                       y_signed,
                       mul_width=None,
                       mul_point=None,
                       mul_signed=None):

    if y_datawidth != 2:
        raise ValueError('not supported')

    if y_point != 0:
        raise ValueError('not supported')

    if not y_signed:
        raise ValueError('not supported')

    name = _tmp_name('updown_mask')
    datawidth = max(x_datawidth, y_datawidth)
    point = max(x_point, y_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1

    z = stream.Mux(y > 0, x, stream.Mux(y < 0, stream.Complement2(x), 0))
    z.latency = 0
    z = stream.Cast(z, signed=x_signed)
    z.latency = 0
    if mul_width is not None:
        z.width = mul_width
    if mul_signed is not None:
        z.signed = mul_signed
    if mul_point is not None and point != mul_point:
        z = stream.Cast(z, point=mul_point)
        z.latency = 0
    z = stream.SraRound(z, rshift)

    stream.sink(z, 'z')
    return stream
Example #15
0
def acc(m, clk, rst,
        datawidth, point, signed,
        sum_width=None, sum_point=None, sum_signed=None):

    name = _tmp_name('acc')

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', datawidth, point, signed)
    size = stream.constant('size', signed=False)

    sum, v = stream.ReduceAddValid(x, size, width=sum_width, signed=sum_signed)
    if sum_point is not None and point != sum_point:
        sum = stream.Cast(sum, point=sum_point)

    stream.sink(sum, 'sum')
    stream.sink(v, 'valid')
    return stream
Example #16
0
def add_tree_rshift_round_frac(m, clk, rst,
                               datawidth, point, signed, num_vars):

    name = _tmp_name('add_tree')

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    vars = [stream.source('var%d' % i, datawidth, point, signed)
            for i in range(num_vars)]
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1

    frac = stream.Mux(rshift > 0, stream.Sll(1, rshift - 1), 0)
    frac.width = datawidth

    sum = Add3Tree(stream, *(vars + [frac]))
    sum = stream.Sra(sum, rshift)
    stream.sink(sum, 'sum')
    return stream
Example #17
0
def div_const_frac(m,
                   clk,
                   rst,
                   x_datawidth,
                   x_point,
                   x_signed,
                   y_datawidth,
                   y_point,
                   y_signed,
                   div_width=None,
                   div_point=None,
                   div_signed=None):

    name = _tmp_name('div_const_frac')
    datawidth = max(x_datawidth, y_datawidth)
    point = max(x_point, y_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)

    frac = stream.source('frac')
    frac.width = datawidth

    neg_frac = stream.Uminus(frac)
    neg_frac.width = datawidth
    neg_frac.latency = 0

    frac = stream.Mux(x >= 0, frac, neg_frac)
    frac.latency = 0
    frac.width = datawidth

    x_frac = stream.Add(x, frac)
    x_frac.latency = 0
    z = stream.Div(x_frac, y)
    if div_width is not None:
        z.width = div_width
    if div_signed is not None:
        z.signed = div_signed
    if div_point is not None and point != div_point:
        z = stream.Cast(z, point=div_point)

    stream.sink(z, 'z')
    return stream
Example #18
0
def mul_rshift_round_madd(m,
                          clk,
                          rst,
                          x_datawidth,
                          x_point,
                          x_signed,
                          y_datawidth,
                          y_point,
                          y_signed,
                          mul_width=None,
                          mul_point=None,
                          mul_signed=None):

    name = _tmp_name('mul')
    datawidth = max(x_datawidth, y_datawidth)
    point = max(x_point, y_point)

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', x_datawidth, x_point, x_signed)
    y = stream.source('y', y_datawidth, y_point, y_signed)
    rshift = stream.source('rshift', signed=False)
    rshift.width = int(math.ceil(math.log(datawidth, 2))) + 1

    frac = stream.Mux(rshift > 0, stream.Sll(1, rshift - 1), 0)
    frac.width = mul_width
    neg_frac = stream.Uminus(frac)
    neg_frac.width = datawidth
    neg_frac.latency = 0
    frac = stream.Mux(x >= 0, frac, neg_frac)
    frac.latency = 0
    frac.width = datawidth

    z = stream.Madd(x, y, frac)
    z.latency = 4
    if mul_width is not None:
        z.width = mul_width
    if mul_signed is not None:
        z.signed = mul_signed
    if mul_point is not None and point != mul_point:
        z = stream.Cast(z, point=mul_point)
    z = stream.Sra(z, rshift)

    stream.sink(z, 'z')
    return stream
Example #19
0
def reduce_max(m, clk, rst, datawidth, point, signed):

    name = _tmp_name('_reduce_max')

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', datawidth, point, signed)
    size = stream.constant('size', signed=False)

    def func(a, b):
        return vg.Mux(a > b, a, b)

    if signed:
        initval = -2**(datawidth - 1)
    else:
        initval = 0

    data, valid = stream.ReduceCustomValid(func, x, size, initval)

    stream.sink(data, 'data')
    stream.sink(valid, 'valid')

    return stream
Example #20
0
def reduce_max(m, clk, rst, datawidth, point, signed):

    name = _tmp_name('_reduce_max')

    stream = vthread.Stream(m, name, clk, rst, datawidth)
    x = stream.source('x', datawidth, point, signed)
    size = stream.constant('size', signed=False)

    if signed:
        initval = -2**(datawidth - 1)
    else:
        initval = 0

    data, valid = stream.ReduceMaxValid(x,
                                        size,
                                        initval=initval,
                                        width=datawidth,
                                        signed=signed)

    stream.sink(data, 'data')
    stream.sink(valid, 'valid')

    return stream
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    size = 16
    pattern = [(size, 0)]

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    sum = a + b
    strm.sink(sum, 'sum')

    def comp_stream(offset):
        strm.set_source_pattern('a', ram_a, offset + 10, pattern)
        strm.set_source_pattern('b', ram_b, offset + 10, pattern)
        strm.set_sink('sum', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(offset + 10)
            b = ram_b.read(offset + 10)
            sum = a + b
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp():
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 4, 1)

        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 8, 1)

        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
Example #22
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    shape = [16, 4, 8]
    size = functools.reduce(lambda x, y: x * y, shape, 1)
    order = [1, 2, 0]

    def to_pattern(shape, order):
        pattern = []
        for p in order:
            size = shape[p]
            stride = functools.reduce(lambda x, y: x * y, shape[p + 1:], 1)
            pattern.append((size, stride))
        return pattern

    pattern_a = to_pattern(shape, order)
    pattern_b = to_pattern(shape, order)
    pattern_c = to_pattern(shape, order)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b
    strm.sink(c, 'c')

    def comp_stream(offset):
        strm.set_source_pattern('a', ram_a, offset, pattern_a)
        strm.set_source_pattern('b', ram_b, offset, pattern_b)
        strm.set_sink_pattern('c', ram_c, offset, pattern_c)
        strm.run()
        strm.join()

    def comp_sequential(offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b
            ram_c.write(i + offset, sum)

    def check(offset_stream, offset_seq):
        all_ok = True
        st = ram_c.read(offset_stream)
        sq = ram_c.read(offset_seq)
        if vthread.verilog.NotEql(st, sq):
            all_ok = False

        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp():
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 4, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 8, 1)

        # verification
        check(0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
Example #23
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    cnt1 = strm.Counter()
    cnt2 = strm.Counter(initval=1)
    cnt3 = strm.Counter(initval=2, size=5)
    cnt4 = strm.Counter(initval=3, interval=3)
    cnt5 = strm.Counter(initval=4, interval=3, size=7)
    cnt6 = strm.Counter(initval=4, step=2, interval=2)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b - a - b + cnt1 + cnt2 + cnt3 + cnt4 + cnt5 + cnt6
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        cnt = 0
        for i in range(size):
            cnt1 = cnt
            cnt2 = 1 + cnt
            cnt3 = (cnt + 2) % 5
            cnt4 = (cnt // 3) + 3
            cnt5 = ((cnt // 3) + 4) % 7
            cnt6 = (cnt // 2) * 2 + 4
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b - a - b + cnt1 + cnt2 + cnt3 + cnt4 + cnt5 + cnt6
            ram_c.write(i + offset, sum)
            cnt += 1

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Example #24
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    mulstrm = vthread.Stream(m, 'mul_stream', clk, rst)
    mulx = mulstrm.source('x')
    muly = mulstrm.source('y')
    mulz = mulx * muly
    mulstrm.sink(mulz, 'z')

    macstrm = vthread.Stream(m, 'mac_stream', clk, rst)
    a = macstrm.source('a')
    b = macstrm.source('b')
    a = a + 1
    b = b + 1
    sub = macstrm.substream(mulstrm)
    sub.to_source('x', a)
    sub.to_source('y', b)
    c = sub.from_sink('z')
    size = macstrm.constant('size')
    sum, sum_valid = macstrm.ReduceAddValid(c, size)
    macstrm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    actstrm = vthread.Stream(m, 'act_stream', clk, rst)
    a = actstrm.source('a')
    b = actstrm.source('b')
    a = a + 1
    b = b + 1
    a = a + 1
    b = b + 1
    sub = actstrm.substream(mulstrm)
    sub.to_source('x', a)
    sub.to_source('y', b)
    c = sub.from_sink('z')
    size = actstrm.constant('size')
    sum, sum_valid = actstrm.ReduceAddValid(c, size)
    sum = actstrm.Mux(sum > 0, sum, 0)
    actstrm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    def comp_stream_mul(size, offset):
        mulstrm.set_source('x', ram_a, offset, size)
        mulstrm.set_source('y', ram_b, offset, size)
        mulstrm.set_sink('z', ram_c, offset, size)
        mulstrm.run()
        mulstrm.join()

    def comp_stream_mac(size, offset):
        macstrm.set_source('a', ram_a, offset, size)
        macstrm.set_source('b', ram_b, offset, size)
        macstrm.set_constant('size', size)
        macstrm.set_sink('sum', ram_c, offset, 1)
        macstrm.run()
        macstrm.join()

    def comp_stream_act(size, offset):
        actstrm.set_source('a', ram_a, offset, size)
        actstrm.set_source('b', ram_b, offset, size)
        actstrm.set_constant('size', size)
        actstrm.set_sink('sum', ram_c, offset, 1)
        actstrm.run()
        actstrm.join()

    def comp_sequential_mul(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a * b
            ram_c.write(i + offset, sum)

    def comp_sequential_mac(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset) + 1
            b = ram_b.read(i + offset) + 1
            sum += a * b
        ram_c.write(offset, sum)

    def comp_sequential_act(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset) + 2
            b = ram_b.read(i + offset) + 2
            sum += a * b
        if sum <= 0:
            sum = 0
        ram_c.write(offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
                print(i, st, sq)
        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp(size):
        # mul
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_mul(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_mul(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        print('# MUL')
        check(size, 0, offset)

        # mac
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# MAC')
        check(1, 0, offset)

        # act
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# ACT')
        check(1, 0, offset)

        # mac 2
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# MAC')
        check(1, 0, offset)

        # act 2
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# ACT')
        check(1, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Example #25
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    size = strm.constant('size')
    cnt, valid = strm.CounterValid(size)
    a = strm.source('a')
    b = strm.source('b')
    cntval = strm.Mux(valid, 1000, cnt)
    c = a + b + cntval
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_constant('size', size // 2)
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        cnt = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b + cnt
            cnt += 1
            if cnt == 1001:
                cnt = 0
            if cnt == size // 2 - 1:
                cnt = 1000
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Example #26
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b
    v = strm.Ands(c > 140, c < 150)
    cnt = strm.ReduceAdd(v)
    strm.sink(c, 'c', when=v, when_name='v')
    strm.sink(cnt, 'cnt')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, 0)  # max_size
        strm.set_sink_immediate('cnt', 0)  # max_size
        strm.run()
        strm.join()
        cnt = strm.read_sink('cnt')
        print('# num of counted: %d' % cnt)
        return cnt

    def comp_sequential(size, offset):
        sum = 0
        addr = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            c = a + b
            if c > 140 and c < 150:
                ram_c.write(addr + offset, c)
                addr += 1
        print('# num of counted: %d' % addr)
        return addr

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        cnt = comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, cnt)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        cnt = comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, cnt)

        # verification
        myaxi.dma_read(ram_c, 0, 1024, cnt)
        myaxi.dma_read(ram_c, offset, 1024 * 2, cnt)
        check(cnt, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Example #27
0
def mkLed(matrix_size=16):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    seq = Seq(m, 'seq', clk, rst)
    timer = m.Reg('timer', 32, initval=0)
    seq(timer.inc())

    datawidth = 32
    addrwidth = 10
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)

    def matmul(matrix_size, a_offset, b_offset, c_offset):
        start_time = timer
        comp(matrix_size, a_offset, b_offset, c_offset)
        end_time = timer
        time = end_time - start_time
        print("Time (cycles): %d" % time)
        check(matrix_size, a_offset, b_offset, c_offset)

    def strm_madd(strm, size, waddr):
        a = strm.read(ram_a, 0, size)
        b = strm.read(ram_b, 0, size)
        sum, valid = strm.RegionAdd(a * b, size)
        strm.write(ram_c, waddr, 1, sum, when=valid)

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            myaxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                myaxi.dma_read(ram_b, 0, b_addr, matrix_size)

                stream.run(matrix_size, j)
                stream.join()

                b_addr += matrix_size * (datawidth // 8)

            myaxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    def check(matrix_size, a_offset, b_offset, c_offset):
        all_ok = True
        c_addr = c_offset
        for i in range(matrix_size):
            myaxi.dma_read(ram_c, 0, c_addr, matrix_size)
            for j in range(matrix_size):
                v = ram_c.read(j)
                if i == j and vthread.verilog.NotEql(v, (i + 1) * 2):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
                if i != j and vthread.verilog.NotEql(v, 0):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
            c_addr += matrix_size * (datawidth // 8)

        if all_ok:
            print("OK")
        else:
            print("NG")

    stream = vthread.Stream(m, 'strm_madd', clk, rst, strm_madd)
    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start(matrix_size, 0, 1024, 2048)

    return m
Example #28
0
def mkLed(memory_datawidth=128):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    numbanks = 4
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth)
    ram_a = vthread.MultibankRAM(m,
                                 'ram_a',
                                 clk,
                                 rst,
                                 datawidth,
                                 addrwidth,
                                 numbanks=numbanks)
    ram_b = vthread.MultibankRAM(m,
                                 'ram_b',
                                 clk,
                                 rst,
                                 datawidth,
                                 addrwidth,
                                 numbanks=numbanks)
    ram_c = vthread.MultibankRAM(m,
                                 'ram_c',
                                 clk,
                                 rst,
                                 datawidth,
                                 addrwidth,
                                 numbanks=numbanks)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
                print(i, st, sq)
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        dma_size = size
        comp_size = size * numbanks

        dma_offset = 0
        comp_offset = 0
        myaxi.dma_read(ram_a, dma_offset, 0, dma_size)
        myaxi.dma_read(ram_b, dma_offset, 0, dma_size)
        comp_stream(size, comp_offset)
        myaxi.dma_write(ram_c, dma_offset, 1024, dma_size)

        dma_offset = size
        comp_offset = comp_size
        myaxi.dma_read(ram_a, dma_offset, 0, dma_size)
        myaxi.dma_read(ram_b, dma_offset, 0, dma_size)
        comp_sequential(size, comp_offset)
        myaxi.dma_write(ram_c, dma_offset, 1024 * 2, dma_size)

        check(comp_size, 0, comp_offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    reduce_size = 4

    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    ram_d = vthread.RAM(m, 'ram_d', clk, rst, datawidth, addrwidth)

    macstrm = vthread.Stream(m, 'macstream', clk, rst)
    macstrm_a = macstrm.source('a')
    macstrm_b = macstrm.source('b')
    macstrm_const = macstrm.constant('const')
    macstrm_mul = macstrm_a * macstrm_b
    macstrm_c, macstrm_v = macstrm.ReduceAddValid(macstrm_mul, macstrm_const)
    macstrm_v += 0
    macstrm.sink(macstrm_c, 'c')
    macstrm.sink(macstrm_v, 'v')

    strm = vthread.Stream(m, 'mystream', clk, rst)
    x = strm.source('x')
    y = strm.source('y')
    const = strm.constant('const')
    sub = strm.substream(macstrm)
    sub.to_source('a', x)
    sub.to_source('b', y)
    sub.to_constant('const', const)
    z = sub.from_sink('c')
    v = sub.from_sink('v')
    z = z + x
    strm.sink(z, 'z', when=v, when_name='v')

    def comp_stream_macstrm(size, offset):
        macstrm.set_source('a', ram_a, offset, size)
        macstrm.set_source('b', ram_b, offset, size)
        macstrm.set_constant('const', reduce_size)
        macstrm.set_sink('c', ram_c, offset, size)
        macstrm.set_sink('v', ram_d, offset, size)
        macstrm.run()
        macstrm.join()

    def comp_stream_mystrm(size, offset):
        strm.set_source('x', ram_a, offset, size)
        strm.set_source('y', ram_b, offset, size)
        strm.set_constant('const', reduce_size)
        strm.set_sink('z', ram_c, offset, size // reduce_size)
        strm.run()
        strm.join()

    def comp_sequential_macstrm(size, offset):
        sum = 0
        count = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum += a * b
            count += 1
            ram_c.write(i + offset, sum)
            ram_d.write(i + offset, count == (reduce_size - 1))
            if count == reduce_size:
                sum = 0
                count = 0

    def comp_sequential_mystrm(size, offset):
        sum = 0
        count = 0
        write_offset = offset
        for i in range(size):
            x = ram_a.read(i + offset)
            y = ram_b.read(i + offset)
            sum += x * y
            val = sum + x
            count += 1
            if count == reduce_size:
                ram_c.write(write_offset, val)
                write_offset += 1
                sum = 0
                count = 0

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
                print(i, st, sq)
        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        print('# macstream')
        check(size, 0, offset)

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size // reduce_size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size)

        # verification
        print('# mystream')
        check(size // reduce_size, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(16)

    return m
Example #30
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    reduce_size = 4

    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    ram_d = vthread.RAM(m, 'ram_d', clk, rst, datawidth, addrwidth)

    macstrm = vthread.Stream(m, 'macstream', clk, rst)
    macstrm_a = macstrm.source('a')
    macstrm_b = macstrm.source('b')
    macstrm_const = macstrm.parameter('const')
    macstrm_mul = macstrm_a * macstrm_b
    macstrm_c, macstrm_v = macstrm.ReduceAddValid(macstrm_mul, macstrm_const)
    macstrm.sink(macstrm_c, 'c')
    macstrm.sink(macstrm_v, 'v')

    macstrm2 = vthread.Stream(m, 'macstream2', clk, rst)
    macstrm2_a = macstrm2.source('a')
    macstrm2_b = macstrm2.source('b')
    macstrm2_const = macstrm2.parameter('const')
    macstrm2_a = macstrm2_a + 1
    macstrm2_a = macstrm2_a - 1
    macstrm2_b = macstrm2_b * 1
    macsub = macstrm2.substream(macstrm)
    macsub.to_source('a', macstrm2_a)
    macsub.to_source('b', macstrm2_b)
    macsub.to_parameter('const', macstrm2_const)
    macstrm2_c = macsub.from_sink('c')
    macstrm2_v = macsub.from_sink('v')
    macstrm2.sink(macstrm2_c, 'c')
    macstrm2.sink(macstrm2_v, 'v')

    neststrm = vthread.Stream(m, 'neststream', clk, rst)
    neststrm_a = neststrm.source('a')
    neststrm_b = neststrm.source('b')
    neststrm_const = neststrm.parameter('const')
    neststrm_a += 1
    neststrm_a += 0
    neststrm_b += 1
    macsub = neststrm.substream(macstrm2)
    macsub.to_source('a', neststrm_a)
    macsub.to_source('b', neststrm_b)
    macsub.to_parameter('const', neststrm_const)
    neststrm_c = macsub.from_sink('c')
    neststrm_c += neststrm_a
    neststrm_c += 0
    neststrm_v = macsub.from_sink('v')
    neststrm.sink(neststrm_c, 'c')
    neststrm.sink(neststrm_v, 'v')

    strm = vthread.Stream(m, 'mystream', clk, rst)
    x = strm.source('x')
    y = strm.source('y')
    const = strm.parameter('const')
    sub = strm.substream(neststrm)
    sub.to_source('a', x)
    sub.to_source('b', y)
    sub.to_parameter('const', const)
    z = sub.from_sink('c')
    v = sub.from_sink('v')
    z = z + y
    strm.sink(z, 'z', when=v, when_name='v')

    all_ok = m.TmpReg(initval=0)

    def comp_stream_macstrm(size, offset):
        macstrm2.set_source('a', ram_a, offset, size)
        macstrm2.set_source('b', ram_b, offset, size)
        macstrm2.set_parameter('const', reduce_size)
        macstrm2.set_sink('c', ram_c, offset, size)
        macstrm2.set_sink('v', ram_d, offset, size)
        macstrm2.run()
        macstrm2.join()

    def comp_stream_mystrm(size, offset):
        strm.set_source('x', ram_a, offset, size)
        strm.set_source('y', ram_b, offset, size)
        strm.set_parameter('const', reduce_size)
        strm.set_sink('z', ram_c, offset, size // reduce_size)
        strm.run()
        strm.join()

    def comp_sequential_macstrm(size, offset):
        sum = 0
        count = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum += a * b
            count += 1
            ram_c.write(i + offset, sum)
            ram_d.write(i + offset, count == (reduce_size - 1))
            if count == reduce_size:
                sum = 0
                count = 0

    def comp_sequential_mystrm(size, offset):
        sum = 0
        count = 0
        write_offset = offset
        for i in range(size):
            x = ram_a.read(i + offset)
            y = ram_b.read(i + offset)
            sum += (x + 1) * (y + 1)
            val = sum + (x + 1) + y
            count += 1
            if count == reduce_size:
                ram_c.write(write_offset, val)
                write_offset += 1
                sum = 0
                count = 0

    def check(size, offset_stream, offset_seq):
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok.value = False
                print(i, st, sq)
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        all_ok.value = True

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        print('# macstream')
        check(size, 0, offset)

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size // reduce_size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size)

        # verification
        print('# mystream')
        check(size // reduce_size, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(16)

    return m