def run(a_shape=(15, 15),
        b_shape=(15, 15),
        c_shape=(15, 15),
        a_dtype=ng.int32,
        b_dtype=ng.int32,
        c_dtype=ng.int32,
        d_dtype=ng.int32,
        par=1,
        axi_datawidth=32,
        silent=False,
        filename=None,
        simtype='iverilog',
        outputfile=None):

    # create target hardware
    a = ng.placeholder(a_dtype, shape=a_shape, name='a')
    b = ng.placeholder(b_dtype, shape=b_shape, name='b')
    c = ng.placeholder(c_dtype, shape=c_shape, name='c')
    x = ng.multiply(a, b, par=par, dtype=ng.int64)
    x = ng.rshift(x, c, dtype=ng.int64, par=par)
    d = ng.clip(x, dtype=d_dtype)

    targ = ng.to_veriloggen([d],
                            'matrix_multiply_rshift_clip',
                            silent=silent,
                            config={'maxi_datawidth': axi_datawidth})

    # verification data
    va = ((np.arange(a.length, dtype=np.int64).reshape(a.shape) % [4] + [1]) *
          [2**(a_dtype.width // 2)])
    vb = ((np.arange(b.length, dtype=np.int64).reshape(b.shape) % [3] + [1]) *
          [2**(b_dtype.width // 2)])
    vc = (np.ones(c.length, dtype=np.int64).reshape(c.shape) *
          [(a_dtype.width + b_dtype.width) // 4])

    eval_outs = ng.eval([d], a=va, b=vb, c=vc)
    vd = eval_outs[0]

    # to memory image
    size_max = int(
        math.ceil(
            max(a.memory_size, b.memory_size, c.memory_size, d.memory_size) /
            4096)) * 4096
    check_addr = max(a.addr, b.addr, c.addr, d.addr) + size_max
    size_check = size_max
    tmp_addr = check_addr + size_check

    memimg_datawidth = 32
    mem = np.zeros([1024 * 1024 * 8 // (memimg_datawidth // 8)],
                   dtype=np.int64)
    mem = mem + [100]

    axi.set_memory(mem, va, memimg_datawidth, a_dtype.width, a.addr,
                   max(int(math.ceil(axi_datawidth / a_dtype.width)), par))
    axi.set_memory(mem, vb, memimg_datawidth, b_dtype.width, b.addr,
                   max(int(math.ceil(axi_datawidth / b_dtype.width)), par))
    axi.set_memory(mem, vc, memimg_datawidth, c_dtype.width, c.addr,
                   max(int(math.ceil(axi_datawidth / c_dtype.width)), par))
    axi.set_memory(mem, vd, memimg_datawidth, d_dtype.width, check_addr,
                   max(int(math.ceil(axi_datawidth / d_dtype.width)), par))

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    if outputfile is None:
        outputfile = os.path.splitext(os.path.basename(__file__))[0] + '.out'

    memimg_name = 'memimg_' + outputfile

    memory = axi.AxiMemoryModel(m,
                                'memory',
                                clk,
                                rst,
                                datawidth=axi_datawidth,
                                memimg_datawidth=memimg_datawidth,
                                memimg=mem,
                                memimg_name=memimg_name)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(time_counter.inc())

    def ctrl():
        for i in range(100):
            pass

        ng.sim.set_global_addrs(_saxi, tmp_addr)

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        # verify
        ok = True
        for i in range(d.shape[0]):
            for j in range(d.shape[1]):
                orig = memory.read_word(i * d.aligned_shape[1] + j, d.addr,
                                        d_dtype.width)
                check = memory.read_word(i * d.aligned_shape[1] + j,
                                         check_addr, d_dtype.width)

                if vthread.verilog.NotEql(orig, check):
                    print('NG', i, j, orig, check)
                    ok = False
                # else:
                #    print('OK', i, j, orig, check)

        if ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ,
                     'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m,
                                  resetn,
                                  m.make_reset(),
                                  period=100,
                                  polarity='low')

    init.add(
        Delay(1000000),
        Systask('finish'),
    )

    # output source code
    if filename is not None:
        m.to_verilog(filename)

    # run simulation
    sim = simulation.Simulator(m, sim=simtype)
    rslt = sim.run(outputfile=outputfile)
    lines = rslt.splitlines()
    if simtype == 'verilator' and lines[-1].startswith('-'):
        rslt = '\n'.join(lines[:-1])
    return rslt
Esempio n. 2
0
    mul_dtype=ng.dtype_int(width=16),
    sum_dtype=ng.dtype_int(width=32),
    name=L045_layer19_conv_cv_cbs_name)
operators[L045_layer19_conv_cv_cbs_name] = L045_layer19_conv_cv_cbs

L049_layer20_upsample = ng.upsampling2d(L045_layer19_conv_cv_cbs, [1, 2, 2, 1],
                                        dtype=ng.dtype_int(width=8),
                                        name=L049_layer20_upsample_name)
operators[L049_layer20_upsample_name] = L049_layer20_upsample

L050_layer21_route_values = [
    L049_layer20_upsample,
    L023_layer9_conv_cv_cbs,
]
L050_layer21_route_op_sc0 = ng.multiply(L050_layer21_route_values[0],
                                        L050_layer21_route_v_scale0,
                                        dtype=ng.dtype_int(width=16),
                                        name=L050_layer21_route_name_scale0)
L050_layer21_route_op_sft0 = ng.rshift_round(
    L050_layer21_route_op_sc0,
    L050_layer21_route_v_shift,
    dtype=ng.dtype_int(width=16),
    name=L050_layer21_route_name_shift0)
L050_layer21_route_op_clp0 = ng.clip(L050_layer21_route_op_sft0,
                                     dtype=ng.dtype_int(width=8),
                                     name=L050_layer21_route_name_clip0)
L050_layer21_route_op_sc1 = ng.multiply(L050_layer21_route_values[1],
                                        L050_layer21_route_v_scale1,
                                        dtype=ng.dtype_int(width=16),
                                        name=L050_layer21_route_name_scale1)
L050_layer21_route_op_sft1 = ng.rshift_round(
    L050_layer21_route_op_sc1,