class _SimpleCB(m.Circuit): name = make_name(width, num_tracks) IO = [ "I", m.In(m.Array(num_tracks, T)), "O", m.Out(T), ] IO += ConfigInterface(CONFIG_ADDR_WIDTH, CONFIG_DATA_WIDTH) IO += m.ClockInterface(has_async_reset=True) @classmethod def definition(io): config = mantle.Register(CONFIG_DATA_WIDTH, init=config_reset, has_ce=True, has_async_reset=True) config_addr_zero = m.bits(0, 8) == io.config_addr[24:32] config(io.config_data, CE=(m.bit(io.config_en) & config_addr_zero)) # If the top 8 bits of config_addr are 0, then read_data is equal # to the value of the config register, otherwise it is 0. m.wire( io.read_data, mantle.mux([m.uint(0, CONFIG_DATA_WIDTH), config.O], config_addr_zero)) # NOTE: This is not robust in the case that the mux which needs more # than 32 select bits (i.e. >= 2^32 inputs). This is unlikely to # happen, but this code is not general. out = generate_mux(io.I, config.O[:sel_bits], m.uint(0, width)) m.wire(out, io.O)
def test_include_verilog(target, simulator): SB_DFF = m.DeclareCircuit('SB_DFF', "D", m.In(m.Bit), "Q", m.Out(m.Bit), "C", m.In(m.Clock)) main = m.DefineCircuit('main', "I", m.In(m.Bit), "O", m.Out(m.Bit), *m.ClockInterface()) ff = SB_DFF() m.wire(ff.D, main.I) m.wire(ff.Q, main.O) m.EndDefine() tester = fault.Tester(main, main.CLK) tester.poke(main.CLK, 0) tester.poke(main.I, 1) tester.eval() tester.expect(main.O, 0) tester.step(2) tester.expect(main.O, 1) sb_dff_filename = pathlib.Path("tests/sb_dff_sim.v").resolve() kwargs = {} if simulator is not None: kwargs["simulator"] = simulator with tempfile.TemporaryDirectory() as tmp_dir: tester.compile_and_run(target=target, directory=tmp_dir, include_verilog_libraries=[sb_dff_filename], **kwargs) if target in ["verilator"]: # Should work by including the tests/ directory which contains the # verilog file SB_DFF.v dir_path = os.path.dirname(os.path.realpath(__file__)) with tempfile.TemporaryDirectory() as tmp_dir: tester.compile_and_run(target=target, directory=tmp_dir, include_directories=[dir_path], **kwargs)
class _Aggregator(m.Circuit): name = 'Aggregator_' + str(word_width) + '_' + str(mem_word_width) IO = [ 'INPUT_PIXELS', m.In(m.Bits[word_width]), 'AGGREGATED_OUTPUT', m.Out(m.Array[mem_word_width, m.Bits[word_width]]), 'VALID', m.Out(m.Bit) ] + m.ClockInterface() @classmethod def definition(agg): # stores input pixels from each cycle regs = [Register(word_width) for i in range(mem_word_width)] regs[0].I <= agg.INPUT_PIXELS if mem_word_width == 1: agg.VALID <= 1 else: # keep track of number of input pixels so far counter = CounterModM(mem_word_width, ceil(log(mem_word_width, 2))) # output VALID on same clock when data is in AGGREGATED_OUTPUT valid_dff = DFF() # valid when number of input pixels is same as memory_width valid_dff.I <= (counter.O == mem_word_width - 1) agg.VALID <= valid_dff.O # output all memory_width INPUT_PIXELS so far a = m.scan(regs, scanargs={'I': 'O'}) agg.AGGREGATED_OUTPUT <= a.O
class _DDS(m.Circuit): name = 'DDS{}'.format(n) IO = ['I', m.In(m.UInt(n)), "O", m.Out(m.UInt(n))] + m.ClockInterface() @classmethod def definition(io): reg = Register(n) m.wire(reg(m.uint(reg.O) + io.I), io.O)
class _Counter(m.Circuit): name = f'Counter{n}' IO = ["O", m.Out(m.UInt[n])] + m.ClockInterface() @classmethod def definition(io): reg = mantle.Register(n) io.O <= reg(m.uint(reg.O) + m.uint(1, n))
class ConfigReg(m.Circuit): IO = ["D", m.In(m.Bits[2]), "Q", m.Out(m.Bits[2])] + \ m.ClockInterface(has_ce=True) @classmethod def definition(io): reg = mantle.Register(2, has_ce=True, name="conf_reg") io.Q <= reg(io.D, CE=io.CE)
class Add210(m.Circuit): IO = ["I",m.In(m.UInt(8)),"O",m.Out(m.UInt(8))]+m.ClockInterface() @classmethod def definition(io): rm = RigelMod() m.wire(io.I,rm.process_input) out = rm.process_output+m.uint(10,8) m.wire(io.O,out) m.wire(rm.CE,m.bit(True))
class ShiftRegister(m.Circuit): name = "ShiftRegister" IO = ["I", m.In(T), "O", m.Out(T)] + m.ClockInterface() @classmethod def definition(io): regs = [Register4() for _ in range(N)] m.wireclock(io, regs) m.wire(io.I, getattr(regs[0], "in")) m.fold(regs, foldargs={"in":"out"}) m.wire(regs[-1].out, io.O)
class _Matcher(magma.Circuit): name = "Matcher" IO = ["char", magma.In(CharType), "match", magma.Out(magma.Bit)] + magma.ClockInterface() @classmethod def definition(io): (i, o) = regex.to_circuit(io.char) magma.wire(1, i) magma.wire(o, io.match)
class Configurable(m.Circuit): IO = ["config_addr", m.In(m.Bits(32)), "config_data", m.In(m.Bits(32)), "config_en", m.In(m.Enable), "O", m.Out(m.Bits(32)) ] + m.ClockInterface() @classmethod def definition(io): reg = mantle.Register(32, has_ce=True) reg(io.config_data, CE=(io.config_addr == m.bits(1, 32)) & m.bit(io.config_en)) m.wire(reg.O, io.O)
class _ShiftRegister(m.Circuit): name = 'ShiftRegister_{}_{}_{}_{}'.format(n, init, has_ce, has_reset) IO = ['I', m.In(m.Bit), 'O', m.Out(m.Bit)] + m.ClockInterface( has_ce, has_reset) @classmethod def definition(siso): ffs = mantle.FFs(n, init=init, has_ce=has_ce, has_reset=has_reset) reg = m.braid(ffs, foldargs={"I": "O"}) reg(siso.I) m.wire(reg.O, siso.O) m.wireclock(siso, reg)
class SimpleALU(m.Circuit): IO = ["a", m.In(m.UInt[16]), "b", m.In(m.UInt[16]), "c", m.Out(m.UInt[16]), "config_data", m.In(m.Bits[2]), "config_en", m.In(m.Enable), ] + m.ClockInterface() @classmethod def definition(io): opcode = ConfigReg(name="config_reg")(io.config_data, CE=io.config_en) io.c <= mantle.mux( [io.a + io.b, io.a - io.b, io.a * io.b, io.a ^ io.b], opcode)
class _ConfigRegister(m.Circuit): name = get_name() IO = ["I", m.In(T), "O", m.Out(T), "addr", m.In(AddressType)] IO += m.ClockInterface(has_ce=True, has_reset=has_reset) @classmethod def definition(io): reg = mantle.Register(n=width, init=0, has_ce=True, has_reset=has_reset) CE = (io.addr == address) & m.bit(io.CE) m.wire(reg(io.I, CE=CE), io.O) if has_reset: m.wire(io.RESET, reg.RESET)
class MagicPacketTracker(m.Circuit): name = "MagicPacketTracker" IO = [ "push", m.In(m.Bit), "pop", m.In(m.Bit), "captured", m.In(m.Bit), "cnt", m.Out(m.UInt(CNTWID)), "next_cnt", m.Out(m.UInt(CNTWID)), "rst", m.In(m.Reset) ] + m.ClockInterface() @classmethod def definition(io): cntreg = DefineRegister(CNTWID, init=0, has_ce=False, has_reset=True, _type=m.UInt) pop_cnt = cntreg(name="pop_cnt") # wire clock m.wireclock(io, pop_cnt) # wire reset m.wire(pop_cnt.RESET, io.rst) # increment enable logic incr_mask = m.bit((pop_cnt.O < m.uint(DEPTH, CNTWID)) & (io.push) & (~io.captured)) wide_incr_mask = repeat(incr_mask, CNTWID) # intermediate signal push_cnt = m.uint(pop_cnt.O + m.uint(m.uint(1, CNTWID) & wide_incr_mask)) # decrement enable logic decr_mask = m.bit((push_cnt > m.uint(0, CNTWID)) & (io.pop)) wide_decr_mask = repeat(decr_mask, CNTWID) # wire next state cnt_update = push_cnt - m.uint(m.uint(1, CNTWID) & wide_decr_mask) m.wire(pop_cnt.I, cnt_update) # wire output m.wire(pop_cnt.O, io.cnt) m.wire(cnt_update, io.next_cnt)
class testReg(m.Circuit): name = "test" IO = ["clk", m.In(m.Clock)] IO += ["In0", m.In(m.Bits[1])] IO += ["Out0", m.Out(m.Bits[1])] IO += m.ClockInterface(has_ce=has_ce, has_async_reset=has_async_reset, has_async_resetn=has_async_resetn) @classmethod def definition(io): reg0 = mantle.Register(1, has_ce=has_ce, has_async_reset=has_async_reset, has_async_resetn=has_async_resetn) m.wire(reg0.CLK, io.clk) m.wire(reg0.I, io.In0) m.wire(reg0.O, io.Out0)
class Aggregator(m.Circuit): name = 'Aggregator_' + str(word_width) + '_' + str(mem_word_width) IO = [ 'in_pixels', m.In(m.Bits[word_width]), 'valid_in', m.In(m.Bit), 'agg_out', m.Out(m.Array[mem_word_width, m.Bits[word_width]]), 'valid_out', m.Out(m.Bit), 'next_full', m.Out(m.Bit) ] + m.ClockInterface() @m.circuit.combinational def check_valid_in(valid_in: m.Bit, check: m.Bit) -> m.Bit: return valid_in & check @classmethod def definition(agg): # stores input pixels from each cycle regs = [Register(word_width) for i in range(mem_word_width)] regs[0].I <= agg.in_pixels mem_word_width_bits = int(log2(mem_word_width)) if mem_word_width == 1: agg.valid_out <= 1 m.wire(agg.next_full, agg.valid_in) else: # keep track of number of input pixels so far counter = CounterModM(mem_word_width, mem_word_width_bits) # output VALID on same clock when data is in AGGREGATED_OUTPUT valid_dff = DFF() # valid when number of input pixels is same as mem_word_width valid_dff.I <= (counter.O == mem_word_width - 2) m.wire(agg.valid_out, agg.check_valid_in(agg.valid_in, valid_dff.O)) # agg.valid_out = agg.check_valid_in(agg.valid_in, valid_dff.O) m.wire( agg.next_full, agg.check_valid_in(agg.valid_in, (counter.O == mem_word_width - 2))) # agg.next_full = agg.check_valid_in(agg.valid_in, (counter.O == mem_word_width - 2)) # m.wire(agg.next_full, agg.valid_in) # output all mem_word_width in_pixels so far a = m.scan(regs, scanargs={'I': 'O'}) agg.agg_out <= a.O
class Register(m.Circuit): name = f"Register__has_ce_{has_ce}__has_reset_{has_reset}__" \ f"has_async_reset__{has_async_reset}__" \ f"type_{_type.__name__}__n_{n}" IO = ["I", m.In(T), "O", m.Out(T)] IO += m.ClockInterface(has_ce=has_ce, has_reset=has_reset, has_async_reset=has_async_reset) @classmethod def definition(io): reg = DefineCoreirReg(n, init, has_async_reset, _type)() I = io.I if has_reset: I = mantle.mux([io.I, m.bits(init, n)], io.RESET) if has_ce: I = mantle.mux([reg.O, I], io.CE) m.wire(I, reg.I) m.wire(io.O, reg.O)
class FIFO(m.Circuit): IO = ["data_in", data_in_type, "data_out", data_out_type] IO += m.ClockInterface() @classmethod def definition(io): addr_width = m.bitutils.clog2(depth) buffer = mantle.RAM(addr_width, flat_length(io.data_in.data)) # pack data into bits buffer.WDATA <= flatten_fields_to_bits(io.data_in.data) # unpack bits into tuple io.data_out.data <= unflatten_bits_to_fields( io.data_out.data, buffer.RDATA) read_pointer = mantle.Register(addr_width + 1) write_pointer = mantle.Register(addr_width + 1) buffer.RADDR <= read_pointer.O[:addr_width] buffer.WADDR <= write_pointer.O[:addr_width] full = \ (read_pointer.O[:addr_width] == write_pointer.O[:addr_width]) \ & \ (read_pointer.O[addr_width] != write_pointer.O[addr_width]) empty = read_pointer == write_pointer write_valid = io.data_in.valid & ~full read_valid = io.data_out.ready & ~empty io.data_in.ready <= ~full buffer.WE <= write_valid write_pointer.I <= mantle.mux( [write_pointer.O, m.uint(write_pointer.O) + 1], write_valid) io.data_out.valid <= read_valid read_pointer.I <= mantle.mux( [read_pointer.O, m.uint(read_pointer.O) + 1], read_valid)
class FIFO(m.Circuit): IO = [ "enq_val", m.In(m.Bit), "enq_rdy", m.Out(m.Bit), "deq_val", m.Out(m.Bit), "deq_rdy", m.In(m.Bit), "enq_dat", m.In(T), "deq_dat", m.Out(T) ] + m.ClockInterface() @classmethod def definition(io): enq_ptr = mantle.Register(address_width) deq_ptr = mantle.Register(address_width) is_full = mantle.FF() do_enq = ~is_full.O & io.enq_val is_empty = ~is_full.O & (enq_ptr.O == deq_ptr.O) do_deq = io.deq_rdy & ~is_empty deq_ptr_inc = m.uint(deq_ptr.O) + 1 enq_ptr_inc = m.uint(enq_ptr.O) + 1 is_full_next = mantle.mux([ mantle.mux([is_full.O, m.bit(False)], do_deq & is_full.O), m.bit(True) ], do_enq & ~do_deq & (enq_ptr_inc == deq_ptr.O)) enq_ptr(mantle.mux([enq_ptr.O, enq_ptr_inc], do_enq)) deq_ptr(mantle.mux([deq_ptr.O, deq_ptr_inc], do_deq)) is_full(is_full_next) ram = mantle.DefineMemory(height, width)() m.wire(ram.RADDR, deq_ptr.O) m.wire(ram.RDATA, io.deq_dat) m.wire(ram.WADDR, enq_ptr.O) m.wire(ram.WDATA, io.enq_dat) m.wire(ram.WE, do_enq) m.wire(io.enq_rdy, ~is_full.O) m.wire(io.deq_val, ~is_empty)
class SimpleALU(m.Circuit): IO = [ "a", m.In(m.UInt(16)), "b", m.In(m.UInt(16)), "c", m.Out(m.UInt(16)), "config_data", m.In(m.Bits(2)), "config_en", m.In(m.Enable), ] + m.ClockInterface() @classmethod def definition(io): opcode = ConfigReg(name="config_reg")(io.config_data, CE=io.config_en) io.c <= mantle.mux( # udiv not implemented # [io.a + io.b, io.a - io.b, io.a * io.b, io.a / io.b], opcode) # use arbitrary fourth op [io.a + io.b, io.a - io.b, io.a * io.b, io.b - io.a], opcode)
width = 16 TIN = m.Array(width, m.BitIn) TOUT = m.Array(width, m.Out(m.Bit)) # Line Buffer interface inType = m.Array(1, m.Array(1, TIN)) # one pixel in per clock outType = m.Array(b, m.Array(a, TOUT)) # downscale window imgType = m.Array(im_h, m.Array(im_w, TIN)) # image dimensions # Reduce interface inType2 = m.In(m.Array(a * b, TIN)) outType2 = TOUT # Top level module: line buffer input, reduce output args = ['I', inType, 'O', outType2, 'WE', m.BitIn, 'V', m.Out(m.Bit)] + \ m.ClockInterface(False, False) dscale = m.DefineCircuit('Downscale', *args) # Line buffer declaration lb = Linebuffer(cirb, inType, outType, imgType, True) m.wire(lb.I, dscale.I) m.wire(lb.wen, dscale.WE) # Reduce declaration red = ReduceParallel(cirb, samples, renameCircuitForReduce(DeclareAdd(width))) # additive identity coreirConst = DefineCoreirConst(width, 0)() # select 16 samples to keep k = 0 for i in [0, 3, 5, 8]:
class Scoreboard(m.Circuit): NUM_REQS = 4 name = "Scoreboard" assert not ARBITER or NUM_REQS is not None, "If using arbiter, need to supply NUM_REQS" assert not ARBITER or QWID is not None, "If using arbiter, need to supply QWID" if ARBITER: IO = [ "push", m.In(m.Bits(NUM_REQS)), "start", m.In(m.Bit), "rst", m.In(m.Reset), # include blk sometime in the future "data_in", m.In(m.Array(N=NUM_REQS, T=m.Bits(DATAWID))), "input_quantums", m.In(m.Array(N=NUM_REQS, T=m.UInt(QWID))), "data_out_vld", m.Out(m.Bit) ] + m.ClockInterface() else: IO = [ "push", m.In(m.Bit), "pop", m.In(m.Bit), "start", m.In(m.Bit), "rst", m.In(m.Reset), "data_in", m.In(m.Bits(DATAWID)), "data_out_vld", m.Out(m.Bit) ] + m.ClockInterface() if NUM_REQS is None: NUM_REQS = 1 @classmethod def definition(io): en = DefineRegister(1, init=0, has_ce=False, has_reset=True, _type=m.Bits)(name="en") # wire clock m.wireclock(en, io) # wire reset m.wire(io.rst, en.RESET) # enable only goes high once, then stays high m.wire(en.O | m.bits(io.start), en.I) mpt = DefineMagicPacketTracker(DEPTH)() # wire up magic packet tracker m.wire(en.O, m.bits(mpt.captured)) m.wire(io.rst, mpt.rst) m.wireclock(io, mpt) if not ARBITER: m.wire(io.push, mpt.push) m.wire(io.pop, mpt.pop) fifo = DefineFIFO(DATAWID, DEPTH)() # wire up fifo m.wire(io.push, fifo.push) m.wire(io.pop, fifo.pop) m.wire(io.rst, fifo.rst) m.wire(io.data_in, fifo.data_in) m.wireclock(io, fifo) else: m.wire(io.push[0], mpt.push) fifos = list() for i in range(NUM_REQS): f = DefineFIFO(DATAWID, DEPTH)(name="fifo_{}".format(i)) fifos.append(f) m.wire(io.push[i], f.push) m.wire(io.rst, f.rst) m.wire(io.data_in[i], f.data_in) m.wireclock(io, f) # Need to wire things up if ARBITER: arb = DefineDWRR(NUM_REQS, QWID, DATAWID)(name="arb") m.wire(io.rst, arb.rst) m.wire(io.input_quantums, arb.quantums) for i in range(NUM_REQS): m.wire(~fifos[i].empty, arb.reqs[i]) m.wire(arb.gnt[i], fifos[i].pop) m.wire(arb.gnt[0], mpt.pop) # vld out # TODO handle missing magic packet -- need to reset everything. Or keep as an assumption/restriction m.wire( m.bit(en.O) & eq(m.uint(mpt.next_cnt), m.uint(0, (DEPTH - 1).bit_length())) & eq(m.uint(mpt.cnt), m.uint(1, (DEPTH - 1).bit_length())), io.data_out_vld)
class FIFO(m.Circuit): name = "FIFO" IO = [ "push", m.In(m.Bit), "pop", m.In(m.Bit), "rst", m.In(m.Reset), "data_in", m.In(m.UInt(WIDTH)), "data_out", m.Out(m.UInt(WIDTH)), "empty", m.Out(m.Bit), "full", m.Out(m.Bit) ] + m.ClockInterface() @classmethod def definition(io): # didn't work with coreir because the rst/bit conversion # clkEn = io.push | io.pop | m.bit(io.rst) ########################## pointer logic ############################## ptrreg = DefineRegister(PTRWID, init=0, has_ce=True, has_reset=True, _type=m.UInt) wrPtr = ptrreg(name="wrPtr") rdPtr = ptrreg(name="rdPtr") # wire clocks m.wireclock(io, wrPtr) m.wireclock(io, rdPtr) # wire resets m.wire(wrPtr.RESET, io.rst) m.wire(rdPtr.RESET, io.rst) # wire enables m.wire(wrPtr.CE, io.push) m.wire(rdPtr.CE, io.pop) # next values increment by one m.wire(wrPtr.I, wrPtr.O + m.uint(1, PTRWID)) m.wire(rdPtr.I, rdPtr.O + m.uint(1, PTRWID)) ######################### end pointer logic ########################### ######################### full and empty logic ######################## m.wire(io.empty, wrPtr.O == rdPtr.O) m.wire(io.full, (wrPtr.O[1:PTRWID] == rdPtr.O[1:PTRWID]) & (wrPtr.O[0] != rdPtr.O[0])) ######################### end full and empty logic #################### ########################### entry logic ############################### # Create and write entries = [] entryReg = DefineRegister(WIDTH, init=0, has_ce=True, has_reset=False, _type=m.Bits) for i in range(DEPTH): entry = entryReg(name="entry" + str(i)) m.wire( entry.CE, io.push & m.bit(m.uint(wrPtr.O[1:PTRWID]) == m.uint(i, PTRWID - 1))) m.wire(entry.I, io.data_in) entries.append(entry) # Connect mux outmux = Mux(DEPTH, WIDTH) for i in range(DEPTH): m.wire(getattr(outmux, "I" + str(i)), entries[i].O) m.wire(rdPtr.O[1:PTRWID], outmux.S) m.wire(outmux.O, io.data_out)