def comp(size): offset = size + size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) offset = size + size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification check(size, 0, offset) vthread.finish()
def comp(size): orig_size = size size = size + size offset = size + size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) offset = size + size + size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) size = orig_size offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) check(size, 0, offset) vthread.finish()
def blink(): vthread.embedded_code('a <= 0;', 'b <= 1;') for _ in range(10): led.value = a vthread.embedded_code('a <= b;', 'b <= a;') vthread.finish()
def matmul(matrix_size, a_offset, b_offset, c_offset): start_time = timer comp(matrix_size, a_offset, b_offset, c_offset) end_time = timer time = end_time - start_time print("Time (cycles): %d" % time) check(matrix_size, a_offset, b_offset, c_offset) vthread.finish()
def blink(size): all_ok.value = True offset = 1024 * 16 body(size, offset) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def ctrl(): for i in range(100): pass awaddr = 4 print('# matrix_size = %d' % matrix_size) _saxi.write(awaddr, matrix_size) awaddr = 8 print('# a_offset = %d' % a_offset) _saxi.write(awaddr, a_offset) awaddr = 12 print('# b_offset = %d' % b_offset) _saxi.write(awaddr, b_offset) awaddr = 16 print('# c_offset = %d' % c_offset) _saxi.write(awaddr, c_offset) awaddr = 0 start_time = counter print('# start time = %d' % start_time) _saxi.write(awaddr, 1) araddr = 20 v = _saxi.read(araddr) while v == 0: v = _saxi.read(araddr) end_time = counter print('# end time = %d' % end_time) time = end_time - start_time print('# exec time = %d' % time) all_ok = True for y in range(matrix_size): for x in range(matrix_size): v = memory.read( c_offset + (y * matrix_size + x) * datawidth // 8) if y == x and vthread.verilog.NotEql(v, (y + 1) * 2): all_ok = False print("NG [%d,%d] = %d" % (y, x, v)) if y != x and vthread.verilog.NotEql(v, 0): all_ok = False print("NG [%d,%d] = %d" % (y, x, v)) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def blink(size): all_ok.value = True # Test for 4KB boundary check offset = myaxi.boundary_size - 4 body(size, offset) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def comp(size): offset = 0 myaxi.dma_read(ram_a, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_b, offset, 1024, 1) offset = size myaxi.dma_read(ram_a, offset, 0, size) comp_sequential(size, offset) myaxi.dma_write(ram_b, offset, 1024 * 2, 1) check(1, 0, offset) vthread.finish()
def comp(): offset = 0 myaxi.dma_read(ram_a, offset, 0, size) comp_stream(offset) myaxi.dma_write(ram_b, offset, 1024 * 4, size) offset = size myaxi.dma_read(ram_a, offset, 0, size) comp_sequential(offset) myaxi.dma_write(ram_b, offset, 1024 * 8, size) check(0, offset) vthread.finish()
def blink(size): all_ok.value = True print('# start') # Test for 4KB boundary check offset = 1024 * 16 + (myaxi.boundary_size - 4) body(size, offset) print('# end') if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def comp(): while True: saxi.wait_flag(0, value=1, resetvalue=0) saxi.write(1, 1) # set busy size = saxi.read(2) for i in range(size): a, a_last = axi_a.read() b = a + 1 b_last = a_last axi_b.write(b, b_last) saxi.write(1, 0) # unset busy vthread.finish()
def blink(size): all_ok.value = True for i in range(4): print('# iter %d start' % i) # Test for 4KB boundary check offset = i * 1024 * 16 + (myaxi.boundary_size - 4) body(size, offset) print('# iter %d end' % i) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def blink(size): all_ok.value = True print('# start') # Test for 4KB boundary check #offset = 1024 * 16 + (myaxi.boundary_size - 4) offset = 1024 * 16 body(size, offset) print('# end') if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def blink(size): all_ok.value = True for i in range(4): print('# iter %d start' % i) # Test for 4KB boundary check offset = i * 1024 * 16 + (myaxi.boundary_size - 4) body(size, offset) print('# iter %d end' % i) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def comp(size): offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, 1) offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, 1) check(size, 0, offset) vthread.finish()
def ctrl(): for i in range(100): pass start_time = time_counter.value ng.sim.start(_saxi) print('# start') ng.sim.wait(_saxi) end_time = time_counter.value print('# end') print('# execution cycles: %d' % (end_time - start_time)) vthread.finish()
def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 1024, size) st_i = comp_stream(size, offset) st_i = comp_stream(size, offset) # sequential offset = size myaxi.dma_read(ram_a, offset, 1024, size) sq_i = comp_sequential(size, offset) # verification check(st_i, sq_i) vthread.finish()
def ctrl(): for i in range(100): pass ng.sim.set_global_addrs(_saxi, tmp_addr) start_time = time_counter.value ng.sim.start(_saxi) print('# start') ng.sim.wait(_saxi) end_time = time_counter.value print('# end') print('# execution cycles: %d' % (end_time - start_time)) # verify ok = True for bat in range(out.shape[0]): for y in range(out.shape[1]): for x in range(out.shape[2]): for ch in range(out.shape[3]): orig = memory.read_word( bat * out.aligned_shape[1] * out.aligned_shape[2] * out.aligned_shape[3] + y * out.aligned_shape[2] * out.aligned_shape[3] + x * out.aligned_shape[3] + ch, out.addr, out_dtype.width) check = memory.read_word( bat * out.aligned_shape[1] * out.aligned_shape[2] * out.aligned_shape[3] + y * out.aligned_shape[2] * out.aligned_shape[3] + x * out.aligned_shape[3] + ch, check_addr, out_dtype.width) if vthread.verilog.NotEql(orig, check): print('NG (', bat, y, x, ch, ') orig: ', orig, ' check: ', check) ok = False # else: # print('OK (', bat, y, x, ch, # ') orig: ', orig, ' check: ', check) if ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def comp(size): new_size = size + size + size offset = 0 myaxi.dma_read(ram_a, offset, 0, new_size) myaxi.dma_read(ram_b, offset, 512, new_size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, new_size) offset = new_size myaxi.dma_read(ram_a, offset, 0, new_size) myaxi.dma_read(ram_b, offset, 512, new_size) comp_sequential(new_size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, new_size) check(new_size, 0, offset) vthread.finish()
def comp(size): new_size = size + size + size offset = 0 myaxi.dma_read(ram_a, offset, 0, new_size) myaxi.dma_read(ram_b, offset, 512, new_size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, new_size) offset = new_size myaxi.dma_read(ram_a, offset, 0, new_size) myaxi.dma_read(ram_b, offset, 512, new_size) comp_sequential(new_size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, new_size) check(new_size, 0, offset) vthread.finish()
def test(): all_ok = True for i in range(10): s = 100 + i uart_tx.send(s) r = uart_rx.recv() if r == s + sw: print('OK: %d + %d == %d' % (s, sw, r)) else: print('NG: %d + %d != %d' % (s, sw, r)) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size * 3) comp_stream(size, offset) myaxi.dma_write(ram_b, offset, 1024, size) # sequential offset = size * 4 myaxi.dma_read(ram_a, offset, 0, size * 3) comp_sequential(size, offset) myaxi.dma_write(ram_b, offset, 1024 * 2, size) # verification check(size, 0, offset) vthread.finish()
def test(): all_ok = True for i in range(10): s = 100 + i uart_tx.send(s) r = uart_rx.recv() if r == s + sw: print('OK: %d + %d == %d' % (s, sw, r)) else: print('NG: %d + %d != %d' % (s, sw, r)) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def comp(size): offset = 0 stride = 2 myaxi.dma_read(ram_a, offset, 0, size, local_stride=stride) myaxi.dma_read(ram_b, offset, 0, size, local_stride=stride) comp_stream(size, offset, stride) myaxi.dma_write(ram_c, offset, 1024, 1) offset = size myaxi.dma_read(ram_a, offset, 0, size, local_stride=stride) myaxi.dma_read(ram_b, offset, 0, size, local_stride=stride) comp_sequential(size, offset, stride) myaxi.dma_write(ram_c, offset, 1024 * 2, 1) check(size, 0, offset) vthread.finish()
def comp(size): all_ok.value = True # addsub # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_addsub(size, offset) myaxi.dma_write(ram_c, offset, 512, size) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_addsub(size, offset) myaxi.dma_write(ram_c, offset, 1024 + 512, size) myaxi.dma_write(ram_c, offset, 1024 + 1024, size) # verification print('# addsub') check(size, 0, offset) # main # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_main(size, offset) myaxi.dma_write(ram_c, offset, 512, size) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_main(size, offset) myaxi.dma_write(ram_c, offset, 1024 + 512, size) myaxi.dma_write(ram_c, offset, 1024 + 1024, size) # verification print('# main') check(size, 0, offset) vthread.finish()
def blink(): vthread.set_parallel() # parallel execution like non-blocking subst a = 0 b = 1 vthread.unset_parallel() for _ in range(10): vthread.set_parallel() # parallel execution like non-blocking subst led.value = a # work as 'swap' a = b b = a vthread.unset_parallel() vthread.finish()
def comp(size): all_ok.value = True # addsub # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_addsub(size, offset) myaxi.dma_write(ram_c, offset, 512, size) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_addsub(size, offset) myaxi.dma_write(ram_c, offset, 1024 + 512, size) myaxi.dma_write(ram_c, offset, 1024 + 1024, size) # verification print('# addsub') check(size, 0, offset) # main # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_main(size, offset) myaxi.dma_write(ram_c, offset, 512, size) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_main(size, offset) myaxi.dma_write(ram_c, offset, 1024 + 512, size) myaxi.dma_write(ram_c, offset, 1024 + 1024, size) # verification print('# main') check(size, 0, offset) vthread.finish()
def ctrl(): width, height = [4, 4] awaddr = 2 * 4 maxi.write(awaddr, width) awaddr = 3 * 4 maxi.write(awaddr, height) awaddr = 0 * 4 maxi.write(awaddr, 1) araddr = 1 * 4 v = maxi.read(araddr) while v == 0: v = maxi.read(araddr) vthread.finish()
def comp(size): all_ok.value = True # mul # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_mul(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_mul(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification print('# MUL') myaxi.dma_read(ram_c, 0, 1024, size) myaxi.dma_read(ram_c, offset, 1024 * 2, size) check(size, 0, offset) # wrap # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_wrap(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_wrap(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification print('# WRAP') myaxi.dma_read(ram_c, 0, 1024, size) myaxi.dma_read(ram_c, offset, 1024 * 2, size) check(size, 0, offset) vthread.finish()
def blink(): vthread.set_parallel() # parallel execution like non-blocking subst a = 0 b = 1 vthread.unset_parallel() for _ in range(10): vthread.set_parallel() # parallel execution like non-blocking subst led.value = a # work as 'swap' a = b b = a vthread.unset_parallel() vthread.finish()
def comp(): # stream myaxi.dma_read(ram_a, offsets[0], 0, dma_size) myaxi.dma_read(ram_b, offsets[0], 0, dma_size) comp_stream() myaxi.dma_write(ram_c, offsets[0], 1024 * 4, 1) # sequential bias = dma_size myaxi.dma_read(ram_a, offsets[0], 0, dma_size) myaxi.dma_read(ram_b, offsets[0], 0, dma_size) comp_sequential(bias) myaxi.dma_write(ram_c, offsets[0], 1024 * 8, 1) # verification check(bias, 0, bias) vthread.finish()
def comp(size): offset = 0 myaxi.dma_read(ram_a, offset, 0, size) ram_a.write(offset + 3, -100) ram_a.write(offset + 7, 200) comp_stream(size, offset) myaxi.dma_write(ram_b, offset, 1024, 1) offset = size myaxi.dma_read(ram_a, offset, 0, size) ram_a.write(offset + 3, -100) ram_a.write(offset + 7, 200) comp_sequential(size, offset) myaxi.dma_write(ram_b, offset, 1024 * 2, 1) check(1, 0, offset) vthread.finish()
def comp(): # stream myaxi.dma_read(ram_a, offsets[0], 0, dma_size) myaxi.dma_read(ram_b, offsets[0], 0, dma_size) comp_stream() myaxi.dma_write(ram_c, offsets[0], 1024 * 4, 1) # sequential bias = dma_size myaxi.dma_read(ram_a, offsets[0], 0, dma_size) myaxi.dma_read(ram_b, offsets[0], 0, dma_size) comp_sequential(bias) myaxi.dma_write(ram_c, offsets[0], 1024 * 8, 1) # verification check(bias, 0, bias) vthread.finish()
def comp(size): all_ok.value = True # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_macstrm(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_macstrm(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification print('# macstream') myaxi.dma_read(ram_c, 0, 1024, size) myaxi.dma_read(ram_c, offset, 1024 * 2, size) check(size, 0, offset) # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_mystrm(size, offset) myaxi.dma_write(ram_c, offset, 1024, size // reduce_size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_mystrm(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size) # verification print('# mystream') myaxi.dma_read(ram_c, 0, 1024, size // reduce_size) myaxi.dma_read(ram_c, offset, 1024 * 2, size // reduce_size) check(size // reduce_size, 0, offset) vthread.finish()
def comp(): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(offset) myaxi.dma_write(ram_c, offset, 1024 * 4, 1) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential(offset) myaxi.dma_write(ram_c, offset, 1024 * 8, 1) # verification check(size, 0, offset) vthread.finish()
def comp(size): numbins = 8 # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) comp_stream(numbins, size, offset) myaxi.dma_write(ram_b, offset, 1024, numbins) # sequential offset = size * 4 myaxi.dma_read(ram_a, offset, 0, size * 2) comp_sequential(numbins, size, offset) myaxi.dma_write(ram_b, offset, 1024 * 2, numbins) # verification check(numbins, 0, offset) vthread.finish()
def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) cnt = comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, cnt) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) cnt = comp_sequential(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, cnt) # verification check(cnt, 0, offset) vthread.finish()
def comp(): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) comp_stream(offset) myaxi.dma_write(ram_b, offset, 1024 * 4, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) comp_sequential(offset) myaxi.dma_write(ram_b, offset, 1024 * 8, size) # verification myaxi.dma_read(ram_b, 0, 1024 * 4, size) myaxi.dma_read(ram_b, offset, 1024 * 8, size) check(0, offset) vthread.finish()
def comp(): # stream myaxi.dma_read(ram_a, 0, 0, dma_size) myaxi.dma_read(ram_b, 0, 0, dma_size) comp_stream() myaxi.dma_write(ram_c, 0, 1024 * 8, dma_size) # sequential myaxi.dma_read(ram_a, dma_size, 0, dma_size) myaxi.dma_read(ram_b, dma_size, 0, dma_size) comp_sequential(dma_size) myaxi.dma_write(ram_c, dma_size, 1024 * 12, dma_size) # verification myaxi.dma_read(ram_c, 0, 1024 * 8, dma_size) myaxi.dma_read(ram_c, dma_size, 1024 * 12, dma_size) check(0, dma_size) vthread.finish()
def blink(times): for i in range(times): wdata = i myfifo.enq(wdata) print('wdata = %d' % wdata) sum = vthread.fixed.FixedConst(0, 8) for i in range(times): rdata = myfifo.deq() sum += rdata print('rdata = %d (%f)' % (rdata.int_part, rdata)) print('sum = %d (%f)' % (sum.int_part, sum)) if vthread.verilog.Eql(sum.int_part, (times - 1) * times // 2): print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def ctrl(): for i in range(100): pass awaddr = 0 _saxi.write(awaddr, 1) araddr = 4 v = _saxi.read(araddr) while v == 0: v = _saxi.read(araddr) araddr = 8 v = _saxi.read(araddr) if v: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def comp(): while True: saxi.wait_flag(0, value=1, resetvalue=0) saxi.write(1, 1) # set busy size = saxi.read(2) offset = 0 axi_a.write_ram_async(ram_a, offset, size, port=1) # non-blocking read axi_b.write_ram_async(ram_b, offset, size, port=1) # non-blocking read axi_a.wait_write_ram() # wait axi_b.wait_write_ram() # wait comp_stream(size, offset) axi_c.read_ram(ram_c, offset, size, port=1) # non-blocking write axi_c.wait_read_ram() # wait saxi.write(1, 0) # unset busy vthread.finish()
def blink(times): for i in range(times): wdata = i myfifo.enq(wdata) print('wdata = %d' % wdata) sum = 0 for i in range(times): rdata = myfifo.deq() sum += rdata print('rdata = %d' % rdata) print('sum = %d' % sum) if vthread.verilog.Eql(sum, (times - 1) * times // 2): print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def comp(size): for i in range(size): ram_a.write(i, size - i - 1) # stream offset = 0 myaxi.dma_read(ram_ext, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_b, offset, 1024, size) # sequential offset = size * 4 myaxi.dma_read(ram_ext, offset, 0, size) comp_sequential(size, offset) myaxi.dma_write(ram_b, offset, 1024 * 2, size) # verification check(size, 0, offset) vthread.finish()
def ctrl(): for i in range(100): pass awaddr = 0 _saxi.write(awaddr, 1) araddr = 4 v = _saxi.read(araddr) while v == 0: v = _saxi.read(araddr) araddr = 8 v = _saxi.read(araddr) if v: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def ctrl(): for i in range(100): pass ng.sim.set_global_offset(_saxi, global_addr_offset) ng.sim.set_global_addrs(_saxi, tmp_addr) start_time = time_counter.value ng.sim.start(_saxi) print('# start') ng.sim.wait(_saxi) end_time = time_counter.value print('# end') print('# execution cycles: %d' % (end_time - start_time)) # verify ok = True for i in range(num_rep): for j in range(c.shape[-1]): orig = memory.read_word(i * c.aligned_shape[-1] + j, c.addr + global_addr_offset, c_dtype.width) check = memory.read_word(i * c.aligned_shape[-1] + j, check_addr + global_addr_offset, c_dtype.width) if vthread.verilog.NotEql(orig, check): print('NG', i, j, orig, check) ok = False # else: # print('OK', i, j, orig, check) if ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish()
def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) st_i = comp_stream(size, offset) + 1 myaxi.dma_write(ram_c, offset, 1024, st_i) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) sq_i = comp_sequential(size, offset) + 1 myaxi.dma_write(ram_c, offset, 1024 * 2, sq_i) # verification myaxi.dma_read(ram_c, 0, 1024, st_i) myaxi.dma_read(ram_c, offset, 1024 * 2, sq_i) check(st_i, sq_i, 0, offset) vthread.finish()
def comp(size): # stream double_size = size + size offset = 0 myaxi.dma_read(ram_a, offset, 0, double_size) myaxi.dma_read(ram_b, offset, 512, double_size) comp_stream(size, offset, 100) comp_stream(size, offset + size, 100) myaxi.dma_write(ram_c, offset, 1024, double_size) # sequential offset = double_size myaxi.dma_read(ram_a, offset, 0, double_size) myaxi.dma_read(ram_b, offset, 512, double_size) comp_sequential(double_size, offset, 100) myaxi.dma_write(ram_c, offset, 1024 * 2, double_size) # verification check(double_size, 0, offset) vthread.finish()
def comp(size): dma_size = size comp_size = size * numbanks * 2 dma_offset = 0 comp_offset = 0 myaxi.dma_read(ram_a, dma_offset, 0, dma_size) myaxi.dma_read(ram_b, dma_offset, 0, dma_size) comp_stream(size, comp_offset) myaxi.dma_write(ram_c, dma_offset, 1024, dma_size) dma_offset = size comp_offset = comp_size myaxi.dma_read(ram_a, dma_offset, 0, dma_size) myaxi.dma_read(ram_b, dma_offset, 0, dma_size) comp_sequential(size, comp_offset) myaxi.dma_write(ram_c, dma_offset, 1024 * 2, dma_size) check(comp_size, 0, comp_offset) vthread.finish()
def comp(size): dma_size = size comp_size = size * numbanks dma_offset = 0 comp_offset = 0 myaxi.dma_read(ram_a, dma_offset, 0, dma_size) myaxi.dma_read(ram_b, dma_offset, 0, dma_size) comp_stream(size, comp_offset) myaxi.dma_write(ram_c, dma_offset, 1024, dma_size) dma_offset = size comp_offset = comp_size myaxi.dma_read(ram_a, dma_offset, 0, dma_size) myaxi.dma_read(ram_b, dma_offset, 0, dma_size) comp_sequential(size, comp_offset) myaxi.dma_write(ram_c, dma_offset, 1024 * 2, dma_size) check(comp_size, 0, comp_offset) vthread.finish()
def comp(size): all_ok.value = True # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_macstrm(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_macstrm(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification print('# macstream') check(size, 0, offset) # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_mystrm(size, offset) myaxi.dma_write(ram_c, offset, 1024, size // reduce_size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_mystrm(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size) # verification print('# mystream') check(size // reduce_size, 0, offset) vthread.finish()
def comp(size): all_ok.value = True # mul # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_mul(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_mul(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification print('# MUL') check(size, 0, offset) # mac # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_mac(size, offset) myaxi.dma_write(ram_c, offset, 1024, 1) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_mac(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, 1) # verification print('# MAC') check(1, 0, offset) # act # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_act(size, offset) myaxi.dma_write(ram_c, offset, 1024, 1) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_act(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, 1) # verification print('# ACT') check(1, 0, offset) vthread.finish()