def mkMain(): m = Module('main') clk = m.Input('CLK') rst = m.Input('RST') myaxi = axi.AxiMaster(m, 'myaxi', clk, rst) myaxi.disable_write() mybram = bram.Bram(m, 'mybram', clk, rst, numports=1) df = dataflow.DataflowManager(m, clk, rst) fsm = FSM(m, 'fsm', clk, rst) # AXI read request araddr = 1024 arlen = 64 ack, axi_counter = myaxi.read_request(araddr, arlen, cond=fsm) fsm.If(ack).goto_next() # AXI read dataflow (AXI -> Dataflow) axi_data, axi_last, done = myaxi.read_dataflow() sum = df.Iadd(axi_data, reset=axi_last.prev(1)) # BRAM write dataflow (Dataflow -> BRAM) wport = 0 waddr = 0 wlen = arlen done = mybram.write_dataflow(wport, waddr, sum, wlen, cond=fsm) fsm.goto_next() fsm.If(done).goto_next() # verify # read dataflow (BRAM -> Dataflow) rport = 0 raddr = 0 rlen = arlen rdata, rlast, done = mybram.read_dataflow(rport, raddr, rlen, cond=fsm) fsm.goto_next() fsm.If(done).goto_next() rdata_data, rdata_valid = rdata.read() rlast_data, rlast_valid = rlast.read() sum = m.Reg('sum', 32, initval=0) expected_sum = 0 for i in range(arlen): expected_sum += (araddr + araddr + i) * (i + 1) // 2 seq = Seq(m, 'seq', clk, rst) seq.If(rdata_valid)(sum.add(rdata_data)) seq.Then().If(rlast_data == 1).Delay(1)(Systask('display', 'sum=%d expected_sum=%d', sum, expected_sum)) return m
def mkMain(n=128, datawidth=32, numports=2): m = Module('main') clk = m.Input('CLK') rst = m.Input('RST') addrwidth = int(math.log(n, 2)) * 2 mybram = bram.Bram(m, 'mybram', clk, rst, datawidth, addrwidth, 2) mybram.disable_write(1) df = dataflow.DataflowManager(m, clk, rst) fsm = FSM(m, 'fsm', clk, rst) # dataflow c = df.Counter() value = c - 1 # write dataflow (Dataflow -> BRAM) wport = 0 waddr = 0 wlen = 64 done = mybram.write_dataflow(wport, waddr, value, wlen, cond=fsm) fsm.goto_next() fsm.If(done).goto_next() fsm.goto_next() # read dataflow (BRAM -> Dataflow) rport = 1 raddr = 0 rlen = 32 rdata, rlast, done = mybram.read_dataflow(rport, raddr, rlen, cond=fsm) fsm.goto_next() fsm.If(done).goto_next() # verify rdata_data, rdata_valid = rdata.read() rlast_data, rlast_valid = rlast.read() sum = m.Reg('sum', 32, initval=0) expected_sum = (raddr + raddr + rlen - 1) * rlen // 2 seq = Seq(m, 'seq', clk, rst) seq.If(rdata_valid)(sum.add(rdata_data)) seq.Then().If(rlast_data == 1).Delay(1)(Systask('display', 'sum=%d expected_sum=%d', sum, expected_sum)) return m
def mkMain(n=128, datawidth=32, numports=2): m = Module('main') clk = m.Input('CLK') rst = m.Input('RST') addrwidth = int(math.log(n, 2)) * 2 mybram = bram.Bram(m, 'mybram', clk, rst, datawidth, addrwidth, 1) # example how to access BRAM count = m.Reg('count', 32, initval=0) sum = m.Reg('sum', 32, initval=0) addr = m.Reg('addr', 32, initval=0) fsm = FSM(m, 'fsm', clk, rst) fsm(addr(0), count(0), sum(0)) fsm.goto_next() step = 16 mybram.write(0, addr, count, cond=fsm) fsm(addr.inc(), count.inc()) fsm.If(count == step - 1)(addr(0), count(0)) fsm.Then().goto_next() read_data, read_valid = mybram.read(0, addr, cond=fsm) fsm(addr.inc(), count.inc()) fsm.If(read_valid)(sum(sum + read_data)) fsm.Then().Delay(1)(Systask('display', "sum=%d", sum)) fsm.If(count == step - 1)(addr(0), count(0)) fsm.Then().goto_next() fsm.If(read_valid)(sum(sum + read_data)) fsm.Then().Delay(1)(Systask('display', "sum=%d", sum)) fsm.make_always() return m
def mkMain(n=128, datawidth=32, numports=2): m = Module('main') clk = m.Input('CLK') rst = m.Input('RST') addrwidth = int(math.log(n, 2)) * 2 mybram = bram.Bram(m, 'mybram', clk, rst, datawidth, addrwidth, 2) mybram.disable_write(1) df = dataflow.DataflowManager(m, clk, rst) fsm = FSM(m, 'fsm', clk, rst) fsm.goto_next() # dataflow c = df.Counter(maxval=64) value = c - 1 # write dataflow (Dataflow -> BRAM) wport = 0 waddr = 0 wlen = 64 done = mybram.write_dataflow(wport, waddr, value, wlen, cond=fsm) fsm.goto_next() fsm.If(done).goto_next() # verify sum = m.Reg('sum', 32, initval=0) expected_sum = (waddr + waddr + wlen - 1) * wlen // 2 - wlen seq = Seq(m, 'seq', clk, rst) seq.If(mybram[0].wenable)( sum.add(mybram[0].wdata) ) seq.Then().If(mybram[0].addr == wlen - 1).Delay(2)( Systask('display', 'sum=%d expected_sum=%d', sum, expected_sum) ) return m
def mkMain(n=128, datawidth=32, numports=2): m = Module('main') clk = m.Input('CLK') rst = m.Input('RST') addrwidth = int(math.log(n, 2)) * 2 mybram = bram.Bram(m, 'mybram', clk, rst, datawidth, addrwidth, 2) mybram.disable_write(1) seq = Seq(m, 'seq', clk, rst) # write waddr = m.Reg('waddr', 32, initval=0) count = m.Reg('count', 32, initval=0) seq.If(waddr < 16)(waddr.inc(), count.inc()) mybram.write(0, waddr, count, cond=seq.then) # read raddr = m.Reg('raddr', 32, initval=0) sum = m.Reg('sum', 32, initval=0) cond = make_condition(seq.Prev(1, delay=4, initval=0), raddr < 16) seq.If(cond)(raddr.inc(), ) read_data, read_valid = mybram.read(1, raddr, cond=cond) seq.If(read_valid)(sum(sum + read_data)) seq.Then().Delay(1)(Systask('display', "sum=%d", sum)) seq.make_always() return m
def mkMain(): m = Module('main') clk = m.Input('CLK') rst = m.Input('RST') slave = axi.AxiSlave(m, 'slave', clk, rst) master = axi.AxiMaster(m, 'master', clk, rst) ram_a = bram.Bram(m, 'ram_a', clk, rst, numports=1) ram_b = bram.Bram(m, 'ram_b', clk, rst, numports=1) ram_c = bram.Bram(m, 'ram_c', clk, rst, numports=1) fsm = FSM(m, 'fsm', clk, rst) # wait for slave request slave_addr, slave_counter, slave_valid = slave.pull_write_request(cond=fsm) fsm.If(slave_valid).goto_next() data, mask, valid, last = slave.pull_write_data(slave_counter, cond=fsm) fsm.If(valid).goto_next() # computation master_addr = 1024 ram_addr = 0 length = 64 dma_done = master.dma_read(ram_a, master_addr, ram_addr, length, cond=fsm) fsm.If(dma_done).goto_next() master_addr = 1024 * 2 dma_done = master.dma_read(ram_b, master_addr, ram_addr, length, cond=fsm) fsm.If(dma_done).goto_next() adata, alast, adone = ram_a.read_dataflow(0, ram_addr, length, cond=fsm) bdata, blast, bdone = ram_b.read_dataflow(0, ram_addr, length, cond=fsm) cdata = adata + bdata done = ram_c.write_dataflow(0, ram_addr, cdata, length, cond=fsm) fsm.goto_next() fsm.If(done).goto_next() master_addr = 1024 * 3 dma_done = master.dma_write(ram_c, master_addr, ram_addr, length, cond=fsm) fsm.If(dma_done).goto_next() # checksum sum = m.Reg('sum', 32, initval=0) expected_sum = (((1024 + 1024 + 63) * 64 // 2) + ((1024 * 2 + 1024 * 2 + 63) * 64 // 2)) seq = Seq(m, 'seq', clk, rst) seq.If(fsm.state == 0)(sum(0)) seq.If(master.wdata.wvalid, master.wdata.wready)(sum(sum + master.wdata.wdata)) seq.If(master.wdata.wvalid, master.wdata.wready, master.wdata.wlast).Delay(1)(Systask('display', "sum=%d expected_sum=%d", sum, expected_sum)) fsm.If(master.wdata.wvalid, master.wdata.wready, master.wdata.wlast).Delay(1).goto_next() # return the checksum slave_addr, slave_counter, slave_valid = slave.pull_read_request(cond=fsm) fsm.If(slave_valid).goto_next() ack, last = slave.push_read_data(sum, slave_counter, cond=fsm) fsm.If(last).goto_next() # repeat fsm.goto_init() return m
def mkMain(): m = Module('main') clk = m.Input('CLK') rst = m.Input('RST') # AXI ports slave = axi.AxiSlave(m, 'slave', clk, rst) master = axi.AxiMaster(m, 'master', clk, rst) # a, b: source, c: result ram_a = bram.Bram(m, 'ram_a', clk, rst, numports=1) ram_b = bram.Bram(m, 'ram_b', clk, rst, numports=1) ram_c = bram.Bram(m, 'ram_c', clk, rst, numports=1) read_fsm = FSM(m, 'read_fsm', clk, rst) write_fsm = FSM(m, 'write_fsm', clk, rst) df = dataflow.DataflowManager(m, clk, rst) # df.enable_draw_graph() read_fsm.goto_next() row_count = m.Reg('row_count', 32, initval=0) read_fsm( row_count(0) ) # wait for slave request slave_addr, slave_counter, slave_valid = slave.pull_write_request( cond=read_fsm) read_fsm.If(slave_valid).goto_next() data, mask, valid, last = slave.pull_write_data( slave_counter, cond=read_fsm) read_fsm.If(valid).goto_next() write_fsm.If(read_fsm).goto_next() # computation master_addr = 1024 * 2 ram_addr = 0 length = 16 dma_done = master.dma_read( ram_b, master_addr, ram_addr, length, cond=read_fsm) read_fsm.If(dma_done).goto_next() comp_start = read_fsm.current master_addr = 1024 ram_addr = 0 length = 16 dma_done = master.dma_read( ram_a, master_addr, ram_addr, length, cond=read_fsm) read_fsm.If(dma_done).goto_next() adata, alast, adone = ram_a.read_dataflow( 0, ram_addr, length, cond=read_fsm) bdata, blast, bdone = ram_b.read_dataflow( 0, ram_addr, length, cond=read_fsm) read_fsm.goto_next() mul = adata * bdata mul_count = df.Counter(maxval=length) wcond = mul_count == 0 cdata = df.Iadd(mul, reset=wcond.prev(1)) read_fsm( row_count.inc() ) read_fsm.If(row_count < length - 1).goto(comp_start) read_fsm.If(row_count == length - 1).goto_next() done = ram_c.write_dataflow( 0, 0, cdata, length, cond=write_fsm, when=wcond) write_fsm.goto_next() write_fsm.If(done).goto_next() master_addr = 1024 * 3 dma_done = master.dma_write( ram_c, master_addr, ram_addr, length, cond=write_fsm) write_fsm.If(dma_done).goto_next() read_fsm.If(write_fsm).goto_init() write_fsm.goto_init() seq = Seq(m, 'seq', clk, rst) seq.If(ram_c[0].wenable)( Systask('display', '[%d]<-%d', ram_c[0].addr, ram_c[0].wdata) ) return m
def mkMain(n=128, datawidth=32, numports=2): m = Module('main') clk = m.Input('CLK') rst = m.Input('RST') addrwidth = int(math.log(n, 2)) * 2 mybram = bram.Bram(m, 'mybram', clk, rst, datawidth, addrwidth, 2, nodataflow=True) xfsm = FSM(m, 'xfsm', clk, rst) xaddr = m.Reg('xaddr', 32, initval=0) xcount = m.Reg('xcount', 32, initval=0) # dataflow variables without manager x = dataflow.Variable('xdata', 'xvalid', 'xready') y = x + 100 y.output('ydata', 'yvalid', 'yread') df = dataflow.Dataflow(y) df.implement(m, clk, rst) # Initialization xfsm( xaddr(0), xcount(0), ) xfsm.goto_next() # write data to BRAM step = 16 mybram.write(0, xaddr, xcount, cond=xfsm) xfsm( xaddr.inc(), xcount.inc() ) xfsm.If(xcount == step - 1)( xaddr(0), xcount(0) ) xfsm.Then().goto_next() # read data from BRAM read_data, read_valid = mybram.read(0, xaddr, cond=(xfsm, xaddr < step)) # BRAM -> dataflow xack = x.write(read_data, cond=read_valid) xfsm( xaddr.inc() ) xfsm.If(read_valid)( Systask('display', 'BRAM0[%d] = %d', xfsm.Prev(xaddr, 2), read_data) ) xfsm.If(xfsm.Prev(xaddr, 1) == step).goto_next() # write result to BRAM yfsm = FSM(m, 'yfsm', clk, rst) yaddr = m.Reg('yaddr', 32, initval=0) yfsm( yaddr(0) ) yfsm.goto_next() # read from dataflow rdata, rvalid = y.read(yfsm) # dataflow -> BRAM mybram.write(1, yaddr, rdata, cond=(yfsm, rvalid)) yfsm.If(rvalid)( yaddr.inc() ) yfsm.If(yaddr == step - 1)( yaddr(0) ) yfsm.Then().goto_next() # read data from BRAM read_data, read_valid = mybram.read(1, yaddr, cond=(yfsm, yaddr < step)) yfsm.If(yaddr < step)( yaddr.inc() ) yfsm.If(read_valid)( Systask('display', 'BRAM1[%d] = %d', yfsm.Prev(yaddr, 2), read_data) ) return m
def mkStencil(n=16, size=3, datawidth=32, point=16, coe_test=False): m = Module('stencil') addrwidth = int(math.log(n, 2)) clk = m.Input('CLK') rst = m.Input('RST') start = m.Input('start') busy = m.OutputReg('busy', initval=0) done = m.TmpReg(initval=0) # external BRAM I/F ext_src_brams = [ bram.BramSlaveInterface(m, 'ext_src_bram%d' % i, datawidth=datawidth, addrwidth=addrwidth) for i in range(size) ] ext_dst_bram = bram.BramSlaveInterface(m, 'ext_dst_bram', datawidth=datawidth, addrwidth=addrwidth) # BRAM addrwidth = int(math.log(n, 2)) * 2 src_brams = [ bram.Bram(m, 'src_bram%d' % i, clk, rst, datawidth=datawidth, addrwidth=addrwidth, numports=2) for i in range(size) ] dst_bram = bram.Bram(m, 'dst_bram', clk, rst, datawidth=datawidth, addrwidth=addrwidth, numports=2) # connect BRAM I/Fs for src_bram, ext_src_bram in zip(src_brams, ext_src_brams): src_bram[1].connect(ext_src_bram) dst_bram[1].connect(ext_dst_bram) # read FSM read_fsm = FSM(m, 'read_fsm', clk, rst) read_count = m.Reg('read_count', 32, initval=0) read_addr = m.Reg('read_addr', 32, initval=0) read_fsm(read_addr(0), read_count(0), busy(0)) read_fsm.If(start)(busy(1)) read_fsm.Then().goto_next() read_fsm(read_addr.inc(), read_count.inc()) idata = [] ivalid = [] for i, src_bram in enumerate(src_brams): src_bram.disable_write(0) rdata, rvalid = src_bram.read(0, read_addr, read_fsm) idata.append(rdata) ivalid.append(rvalid) read_fsm.If(read_count == n - 1)(read_addr(0), read_count(0)) read_fsm.Then().goto_next() read_fsm.If(done)(busy(0)) read_fsm.Then().goto_init() read_fsm.make_always() # instance odata = m.Wire('odata', datawidth) ovalid = m.Wire('ovalid') ports = [] ports.append(('CLK', clk)) ports.append(('RST', rst)) for i, (d, v) in enumerate(zip(idata, ivalid)): ports.append(('idata%d' % i, d)) ports.append(('ivalid%d' % i, v)) ports.append(('odata', odata)) ports.append(('ovalid', ovalid)) coe = None if coe_test: coe = [[dataflow.Constant(1, point=point) for i in range(size)] for j in range(size)] point = 0 st = mkStencilPipeline2D(size=3, width=datawidth, point=point, coe=coe) m.Instance(st, 'inst_stencil', ports=ports) skip_offset = int(math.floor(size / 2)) # write FSM write_fsm = FSM(m, 'write_fsm', clk, rst) write_count = m.Reg('write_count', 32, initval=0) write_addr = m.Reg('write_addr', 32, initval=skip_offset) write_fsm(done(0)) write_fsm.If(Ands(ovalid, write_count > skip_offset))(write_addr.inc()) dst_bram.write(0, write_addr, odata, write_fsm.then) write_fsm.If(ovalid)(write_count.inc(), ) write_fsm.If(write_count == n)(write_count(0), write_addr(skip_offset), done(1)) write_fsm.Then().goto_init() write_fsm.make_always() return m