Python matmul Examples

Programming Language: Python

Namespace/Package Name: nngen

Method/Function: matmul

Examples at hotexamples.com: 5

Python matmul - 5 examples found. These are the top rated real world Python examples of nngen.matmul extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

               scale=s1,
               act_func=ng.relu,
               dtype=act_dtype,
               sum_dtype=ng.int32)

a1r = ng.reshape(a1, [batchsize, -1])

# layer 2: full-connection, relu
w2 = ng.variable(weight_dtype, shape=(256, a1r.shape[-1]), name='w2')
b2 = ng.variable(bias_dtype, shape=(w2.shape[0], ), name='b2')
s2 = ng.variable(scale_dtype, shape=(w2.shape[0], ), name='s2')

a2 = ng.matmul(a1r,
               w2,
               bias=b2,
               scale=s2,
               transposed_b=True,
               act_func=ng.relu,
               dtype=act_dtype,
               sum_dtype=ng.int32)

# layer 3: full-connection, relu
w3 = ng.variable(weight_dtype, shape=(10, a2.shape[-1]), name='w3')
b3 = ng.variable(bias_dtype, shape=(w3.shape[0], ), name='b3')
s3 = ng.variable(scale_dtype, shape=(w3.shape[0], ), name='s3')

# output
output_layer = ng.matmul(a2,
                         w3,
                         bias=b3,
                         scale=s3,
                         transposed_b=True,

Example #2

Show file

File: matrix_matmul.py Project: herosugi/nngen

def run(a_shape=(15, 15),
        b_shape=(15, 15),
        bias_shape=None,
        scale_shape=None,
        a_dtype=ng.int32,
        b_dtype=ng.int32,
        bias_dtype=ng.int32,
        scale_dtype=ng.int32,
        c_dtype=ng.int32,
        rshift_mul=None,
        rshift_sum=None,
        rshift_out=None,
        act_func=None,
        par_left_col=1,
        par_left_row=1,
        par_out_col=1,
        concur_out_col=None,
        stationary='right',
        left_ram_size=None,
        right_ram_size=None,
        bias_ram_size=None,
        scale_ram_size=None,
        out_ram_size=None,
        axi_datawidth=32,
        silent=False,
        filename=None,
        simtype='iverilog',
        outputfile=None):

    # create target hardware
    a = ng.placeholder(a_dtype, shape=a_shape, name='a')
    b = ng.placeholder(b_dtype, shape=b_shape, name='b')

    if bias_shape is not None:
        bias = ng.placeholder(bias_dtype, bias_shape, name='bias')
    else:
        bias = None

    if scale_shape is not None:
        scale = ng.placeholder(scale_dtype, scale_shape, name='scale')
    else:
        scale = None

    transposed_a = False
    transposed_b = True

    c = ng.matmul(a, b, bias, scale, transposed_a, transposed_b, rshift_mul,
                  rshift_sum, rshift_out, act_func, c_dtype, ng.int32,
                  ng.int32, 'matmul', par_left_col, par_left_row, par_out_col,
                  concur_out_col, stationary, left_ram_size, right_ram_size,
                  bias_ram_size, scale_ram_size, None, None, None,
                  out_ram_size)

    targ = ng.to_veriloggen([c],
                            'matrix_matmul',
                            silent=silent,
                            config={'maxi_datawidth': axi_datawidth})

    # verification data
    va = np.arange(a.length, dtype=np.int64).reshape(a.shape) % [5]
    vb = np.arange(b.length, dtype=np.int64).reshape(b.shape) % [5] - [3]

    if bias is not None:
        vbias = np.arange(bias.length, dtype=np.int64).reshape(
            bias.shape) % [4]
    else:
        vbias = None

    if scale is not None:
        vscale = np.arange(scale.length, dtype=np.int64).reshape(
            scale.shape) % [6]
    else:
        vscale = None

    vc = ng.verify.matmul(va, vb, bias, scale, False, True, rshift_mul,
                          rshift_sum, rshift_out, act_func, c_dtype, ng.int32,
                          ng.int32, 'matmul', par_left_col, par_left_row,
                          par_out_col, concur_out_col, stationary,
                          left_ram_size, right_ram_size, bias_ram_size,
                          scale_ram_size, None, None, None, out_ram_size,
                          False, a_dtype, b_dtype, bias_dtype, scale_dtype)

    # to memory image
    size_max = int(
        math.ceil(
            max(a.memory_size, b.memory_size,
                bias.memory_size if bias is not None else 0, scale.memory_size
                if scale is not None else 0, c.memory_size) / 4096)) * 4096
    check_addr = max(a.addr, b.addr, bias.addr if bias is not None else -1,
                     scale.addr if scale is not None else -1,
                     c.addr) + size_max
    size_check = size_max
    tmp_addr = check_addr + size_check

    memimg_datawidth = 32
    mem = np.zeros([1024 * 1024 * 8 // memimg_datawidth], dtype=np.int64)
    mem = mem + [100]

    axi.set_memory(
        mem, va, memimg_datawidth, a_dtype.width, a.addr,
        max(int(math.ceil(axi_datawidth / a_dtype.width)), par_left_col))

    axi.set_memory(
        mem, vb, memimg_datawidth, b_dtype.width, b.addr,
        max(int(math.ceil(axi_datawidth / b_dtype.width)), par_left_col))

    if bias is not None:
        axi.set_memory(
            mem, vbias, memimg_datawidth, bias_dtype.width, bias.addr,
            max(int(math.ceil(axi_datawidth / bias_dtype.width)), par_out_col))

    if scale is not None:
        axi.set_memory(
            mem, vscale, memimg_datawidth, scale_dtype.width, scale.addr,
            max(int(math.ceil(axi_datawidth / scale_dtype.width)),
                par_out_col))

    axi.set_memory(
        mem, vc, memimg_datawidth, c_dtype.width, check_addr,
        max(int(math.ceil(axi_datawidth / c_dtype.width)), par_out_col))

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    if outputfile is None:
        outputfile = os.path.splitext(os.path.basename(__file__))[0] + '.out'

    memimg_name = 'memimg_' + outputfile

    memory = axi.AxiMemoryModel(m,
                                'memory',
                                clk,
                                rst,
                                datawidth=axi_datawidth,
                                memimg=mem,
                                memimg_name=memimg_name,
                                memimg_datawidth=memimg_datawidth)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(time_counter.inc())

    def ctrl():
        for i in range(100):
            pass

        ng.sim.set_global_addrs(_saxi, tmp_addr)

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        # verify
        ok = True
        for i in range(c.shape[0]):
            for j in range(c.shape[1]):
                orig = memory.read_word(i * c.aligned_shape[1] + j, c.addr,
                                        c_dtype.width)
                check = memory.read_word(i * c.aligned_shape[1] + j,
                                         check_addr, c_dtype.width)

                if vthread.verilog.NotEql(orig, check):
                    print(i, j, orig, check)
                    ok = False

        if ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ,
                     'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m,
                                  resetn,
                                  m.make_reset(),
                                  period=100,
                                  polarity='low')

    init.add(
        Delay(1000000),
        Systask('finish'),
    )

    # output source code
    if filename is not None:
        m.to_verilog(filename)

    # run simulation
    sim = simulation.Simulator(m, sim=simtype)
    rslt = sim.run(outputfile=outputfile)
    lines = rslt.splitlines()
    if simtype == 'verilator' and lines[-1].startswith('-'):
        rslt = '\n'.join(lines[:-1])
    return rslt

Example #3

Show file

def mkTest(ich=3, och=10, ch=64, ksize=3, stride=1, col=28, row=28):
    # create target hardware

    # layer 0: conv2d, max_pool_serial, relu
    input_layer = ng.placeholder(ng.int32,
                                 shape=(1, row, col, ich),
                                 name='input_layer')
    w0 = ng.variable(ng.int32, shape=(ch, ksize, ksize, ich), name='w0')
    a0 = ng.conv2d(input_layer, w0, strides=(1, stride, stride, 1))
    a0 = ng.max_pool_serial(a0, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1))
    a0 = ng.relu(a0)

    # layer 1: conv2d, relu, reshape
    w1 = ng.variable(ng.int32,
                     shape=(ch, ksize, ksize, a0.shape[-1]),
                     name='w1')
    a1 = ng.conv2d(a0, w1, strides=(1, stride, stride, 1))
    a1 = ng.relu(a1)
    a1 = ng.reshape(a1, [-1])

    # layer 2: full-connection
    w2 = ng.variable(ng.int32, shape=(16, a1.shape[-1]), name='w2')
    a2 = ng.matmul(a1, w2, transposed_b=True)
    a2 = ng.relu(a2)

    # layer 3: full-connection
    w3 = ng.variable(ng.int32, shape=(och, a2.shape[-1]), name='w3')
    output_layer = ng.matmul(a2, w3, transposed_b=True, name='output_layer')

    targ = ng.to_veriloggen([output_layer], 'cnn')
    #targ = ng.to_ipxact([output_layer], 'cnn')

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    memory = axi.AxiMemoryModel(m, 'memory', clk, rst, mem_addrwidth=23)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(time_counter.inc())

    def ctrl():
        for i in range(100):
            pass

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ,
                     'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m,
                                  resetn,
                                  m.make_reset(),
                                  period=100,
                                  polarity='low')

    init.add(
        Delay(10000000),
        Systask('finish'),
    )

    return m

Example #4

Show file

def run(
    act_dtype=ng.int8,
    weight_dtype=ng.int8,
    bias_dtype=ng.int32,
    scale_dtype=ng.int8,
    par_ich=2,
    par_och=2,
    chunk_size=64,
    axi_datawidth=32,
    silent=False,
    weight_filename='cnn.npy',
    verilog_filename=None,
    sim_filename=None,
    # simtype='iverilog',
    simtype='verilator',
    # simtype=None,  # no RTL simulation
):

    # --------------------
    # (1) Represent a DNN model as a dataflow by NNgen operators
    # --------------------

    # input
    input_layer = ng.placeholder(
        dtype=act_dtype,
        shape=(1, 32, 32, 3),  # N, H, W, C
        name='input_layer')

    # layer 0: conv2d (with bias and scale (= batchnorm)), relu, max_pool
    w0 = ng.variable(
        dtype=weight_dtype,
        shape=(64, 3, 3, 3),  # Och, Ky, Kx, Ich
        name='w0')
    b0 = ng.variable(dtype=bias_dtype, shape=(w0.shape[0], ), name='b0')
    s0 = ng.variable(dtype=scale_dtype, shape=(w0.shape[0], ), name='s0')

    a0 = ng.conv2d(input_layer,
                   w0,
                   strides=(1, 1, 1, 1),
                   bias=b0,
                   scale=s0,
                   act_func=ng.relu,
                   dtype=act_dtype,
                   sum_dtype=ng.int32)

    a0p = ng.max_pool_serial(a0, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1))

    # layer 1: conv2d, relu, reshape
    w1 = ng.variable(weight_dtype, shape=(64, 3, 3, a0.shape[-1]), name='w1')
    b1 = ng.variable(bias_dtype, shape=(w1.shape[0], ), name='b1')
    s1 = ng.variable(scale_dtype, shape=(w1.shape[0], ), name='s1')

    a1 = ng.conv2d(a0p,
                   w1,
                   strides=(1, 1, 1, 1),
                   bias=b1,
                   scale=s1,
                   act_func=ng.relu,
                   dtype=act_dtype,
                   sum_dtype=ng.int32)

    a1r = ng.reshape(a1, [1, -1])

    # layer 2: full-connection, relu
    w2 = ng.variable(weight_dtype, shape=(256, a1r.shape[-1]), name='w2')
    b2 = ng.variable(bias_dtype, shape=(w2.shape[0], ), name='b2')
    s2 = ng.variable(scale_dtype, shape=(w2.shape[0], ), name='s2')

    a2 = ng.matmul(a1r,
                   w2,
                   bias=b2,
                   scale=s2,
                   transposed_b=True,
                   act_func=ng.relu,
                   dtype=act_dtype,
                   sum_dtype=ng.int32)

    # layer 3: full-connection, relu
    w3 = ng.variable(weight_dtype, shape=(10, a2.shape[-1]), name='w3')
    b3 = ng.variable(bias_dtype, shape=(w3.shape[0], ), name='b3')
    s3 = ng.variable(scale_dtype, shape=(w3.shape[0], ), name='s3')

    # output
    output_layer = ng.matmul(a2,
                             w3,
                             bias=b3,
                             scale=s3,
                             transposed_b=True,
                             name='output_layer',
                             dtype=act_dtype,
                             sum_dtype=ng.int32)

    # --------------------
    # (2) Assign weights to the NNgen operators
    # --------------------

    # In this example, random floating-point values are assigned.
    # In a real case, you should assign actual weight values
    # obtianed by a training on DNN framework.

    # If you don't you NNgen's quantizer, you can assign integer weights to each tensor.

    w0_value = np.random.normal(size=w0.length).reshape(w0.shape)
    w0_value = np.clip(w0_value, -3.0, 3.0)
    w0.set_value(w0_value)

    b0_value = np.random.normal(size=b0.length).reshape(b0.shape)
    b0_value = np.clip(b0_value, -3.0, 3.0)
    b0.set_value(b0_value)

    s0_value = np.ones(s0.shape)
    s0.set_value(s0_value)

    w1_value = np.random.normal(size=w1.length).reshape(w1.shape)
    w1_value = np.clip(w1_value, -3.0, 3.0)
    w1.set_value(w1_value)

    b1_value = np.random.normal(size=b1.length).reshape(b1.shape)
    b1_value = np.clip(b1_value, -3.0, 3.0)
    b1.set_value(b1_value)

    s1_value = np.ones(s1.shape)
    s1.set_value(s1_value)

    w2_value = np.random.normal(size=w2.length).reshape(w2.shape)
    w2_value = np.clip(w2_value, -3.0, 3.0)
    w2.set_value(w2_value)

    b2_value = np.random.normal(size=b2.length).reshape(b2.shape)
    b2_value = np.clip(b2_value, -3.0, 3.0)
    b2.set_value(b2_value)

    s2_value = np.ones(s2.shape)
    s2.set_value(s2_value)

    w3_value = np.random.normal(size=w3.length).reshape(w3.shape)
    w3_value = np.clip(w3_value, -3.0, 3.0)
    w3.set_value(w3_value)

    b3_value = np.random.normal(size=b3.length).reshape(b3.shape)
    b3_value = np.clip(b3_value, -3.0, 3.0)
    b3.set_value(b3_value)

    s3_value = np.ones(s3.shape)
    s3.set_value(s3_value)

    # Quantizing the floating-point weights by the NNgen quantizer.
    # Alternatively, you can assign integer weights by yourself to each tensor.

    imagenet_mean = np.array([0.485, 0.456, 0.406]).astype(np.float32)
    imagenet_std = np.array([0.229, 0.224, 0.225]).astype(np.float32)

    if act_dtype.width > 8:
        act_scale_factor = 128
    else:
        act_scale_factor = int(round(2**(act_dtype.width - 1) * 0.5))

    input_scale_factors = {'input_layer': act_scale_factor}
    input_means = {'input_layer': imagenet_mean * act_scale_factor}
    input_stds = {'input_layer': imagenet_std * act_scale_factor}

    ng.quantize([output_layer], input_scale_factors, input_means, input_stds)

    # --------------------
    # (3) Assign hardware attributes
    # --------------------

    # conv2d, matmul
    # par_ich: parallelism in input-channel
    # par_och: parallelism in output-channel
    # par_col: parallelism in pixel column
    # par_row: parallelism in pixel row

    a0.attribute(par_ich=par_ich, par_och=par_och)
    a1.attribute(par_ich=par_ich, par_och=par_och)
    a2.attribute(par_ich=par_ich, par_och=par_och)
    output_layer.attribute(par_ich=par_ich, par_och=par_och)

    # cshamt_out: right shift amount after applying bias/scale
    # If you assign integer weights by yourself to each tensor,
    # cshamt (constant shift amount) must be assigned to each operator.

    # a0.attribute(cshamt_out=weight_dtype.width + 1)
    # a1.attribute(cshamt_out=weight_dtype.width + 1)
    # a2.attribute(cshamt_out=weight_dtype.width + 1)
    # output_layer.attribute(cshamt_out=weight_dtype.width + 1)

    # max_pool
    # par: parallelism in in/out channel

    par = par_och

    a0p.attribute(par=par)

    # --------------------
    # (4) Verify the DNN model behavior by executing the NNgen dataflow as a software
    # --------------------

    # In this example, random integer values are assigned.
    # In real case, you should assign actual integer activation values, such as an image.

    input_layer_value = np.random.normal(size=input_layer.length).reshape(
        input_layer.shape)
    input_layer_value = input_layer_value * imagenet_std + imagenet_mean
    input_layer_value = np.clip(input_layer_value, -5.0, 5.0)
    input_layer_value = input_layer_value * act_scale_factor
    input_layer_value = np.clip(input_layer_value,
                                -1 * 2**(act_dtype.width - 1) - 1,
                                2**(act_dtype.width - 1))
    input_layer_value = np.round(input_layer_value).astype(np.int64)

    eval_outs = ng.eval([output_layer], input_layer=input_layer_value)
    output_layer_value = eval_outs[0]

    # print(output_layer_value)
    # breakpoint()

    # --------------------
    # (5) Convert the NNgen dataflow to a hardware description (Verilog HDL and IP-XACT)
    # --------------------

    # to Veriloggen object
    # targ = ng.to_veriloggen([output_layer], 'cnn', silent=silent,
    #                        config={'maxi_datawidth': axi_datawidth})

    # to IP-XACT (the method returns Veriloggen object, as well as to_veriloggen)
    targ = ng.to_ipxact([output_layer],
                        'cnn',
                        silent=silent,
                        config={'maxi_datawidth': axi_datawidth})

    # to Verilog HDL RTL (the method returns a source code text)
    # rtl = ng.to_verilog([output_layer], 'cnn', silent=silent,
    #                    config={'maxi_datawidth': axi_datawidth})

    # --------------------
    # (6) Save the quantized weights
    # --------------------

    # convert weight values to a memory image:
    # on a real FPGA platform, this image will be used as a part of the model definition.

    param_filename = 'hello_nngen.npy'
    chunk_size = 64

    param_data = ng.export_ndarray([output_layer], chunk_size)
    np.save(weight_filename, param_data)

    # --------------------
    # (7) Simulate the generated hardware by Veriloggen and Verilog simulator
    # --------------------

    if simtype is None:
        sys.exit()

    param_bytes = len(param_data)

    variable_addr = int(
        math.ceil((input_layer.addr + input_layer.memory_size) /
                  chunk_size)) * chunk_size
    check_addr = int(math.ceil(
        (variable_addr + param_bytes) / chunk_size)) * chunk_size
    tmp_addr = int(
        math.ceil(
            (check_addr + output_layer.memory_size) / chunk_size)) * chunk_size

    memimg_datawidth = 32
    mem = np.zeros([1024 * 1024 * 256 // (memimg_datawidth // 8)],
                   dtype=np.int64)
    mem = mem + [100]

    # placeholder
    axi.set_memory(
        mem, input_layer_value, memimg_datawidth, act_dtype.width,
        input_layer.addr,
        max(int(math.ceil(axi_datawidth / act_dtype.width)), par_ich))

    # parameters (variable and constant)
    axi.set_memory(mem, param_data, memimg_datawidth, 8, variable_addr)

    # verification data
    axi.set_memory(
        mem, output_layer_value, memimg_datawidth, act_dtype.width, check_addr,
        max(int(math.ceil(axi_datawidth / act_dtype.width)), par_och))

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    if sim_filename is None:
        sim_filename = os.path.splitext(os.path.basename(__file__))[0] + '.out'

    memimg_name = 'memimg_' + sim_filename

    memory = axi.AxiMemoryModel(m,
                                'memory',
                                clk,
                                rst,
                                datawidth=axi_datawidth,
                                memimg=mem,
                                memimg_name=memimg_name,
                                memimg_datawidth=memimg_datawidth)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(time_counter.inc())

    def ctrl():
        for i in range(100):
            pass

        ng.sim.set_global_addrs(_saxi, tmp_addr)

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        # verify
        ok = True
        for bat in range(output_layer.shape[0]):
            for x in range(output_layer.shape[1]):
                orig = memory.read_word(
                    bat * output_layer.aligned_shape[1] + x, output_layer.addr,
                    act_dtype.width)
                check = memory.read_word(
                    bat * output_layer.aligned_shape[1] + x, check_addr,
                    act_dtype.width)

                if vthread.verilog.NotEql(orig, check):
                    print('NG (', bat, x, ') orig: ', orig, ' check: ', check)
                    ok = False
                else:
                    print('OK (', bat, x, ') orig: ', orig, ' check: ', check)

        if ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ,
                     'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m,
                                  resetn,
                                  m.make_reset(),
                                  period=100,
                                  polarity='low')

    init.add(
        Delay(10000000),
        Systask('finish'),
    )

    # output source code
    if verilog_filename is not None:
        m.to_verilog(verilog_filename)

    # run simulation
    sim = simulation.Simulator(m, sim=simtype)
    rslt = sim.run(outputfile=sim_filename)
    lines = rslt.splitlines()
    if simtype == 'verilator' and lines[-1].startswith('-'):
        rslt = '\n'.join(lines[:-1])
    return rslt

Example #5

Show file

def mkTest(n_input=784, n_classes=10):
    # create target hardware
    x = ng.placeholder(ng.int32, shape=[n_input])

    w1 = ng.variable(ng.int32, shape=(n_input, n_input), name='h1')
    w2 = ng.variable(ng.int32, shape=(n_input, n_input), name='h2')
    w3 = ng.variable(ng.int32, shape=(n_classes, n_input), name='out')

    l1 = ng.matmul(x, w1, transposed_b=True)
    l1 = ng.relu(l1)

    l2 = ng.matmul(l1, w2, transposed_b=True)
    l2 = ng.relu(l2)

    out = ng.matmul(l2, w3, transposed_b=True)

    targ = ng.to_veriloggen([out], 'mlp')
    #targ = ng.to_ipxact([model], 'mlp')

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    memory = axi.AxiMemoryModel(m, 'memory', clk, rst)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(time_counter.inc())

    def ctrl():
        for i in range(100):
            pass

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ,
                     'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m,
                                  resetn,
                                  m.make_reset(),
                                  period=100,
                                  polarity='low')

    init.add(
        Delay(1000000),
        Systask('finish'),
    )

    return m