コード例 #1
0
def test_bits():
    assert isinstance(bits(1, 4), BitsType)
    assert isinstance(bits([1, 0, 0, 0]), BitsType)
    assert isinstance(bits(VCC), BitsType)
    assert isinstance(bits(array(1, 4)), BitsType)
    assert isinstance(bits(uint(1, 4)), BitsType)
    assert isinstance(bits(sint(1, 4)), BitsType)
コード例 #2
0
ファイル: interface.py プロジェクト: nbenavi/CS448H
 def __getitem__(self, key):
     if isinstance(key, slice):
         return array(*[self[i] for i in range(*key.indices(len(self)))])
     else:
         assert 0 <= key and key < len(self), "key: %d, self.N: %d" % (
             key, len(self))
         return self.arguments()[key]
コード例 #3
0
ファイル: higher.py プロジェクト: nbenavi/CS448H
def flatarg(arg, interfaces):
    args = getarg(arg, interfaces)
    direction = getdirection(args)
    flatargs = []
    for a in args:
        for i in range(len(a)):
            flatargs.append(a[i])
    return ['%s %s' % (direction, arg), array(*flatargs)]
コード例 #4
0
def test_tuple():
    assert isinstance(tuple_(OrderedDict(x=0, y=1)), TupleType)
    assert isinstance(tuple_([0, 1]), TupleType)
    assert isinstance(tuple_(VCC), TupleType)
    assert isinstance(tuple_(array(1, 4)), TupleType)
    assert isinstance(tuple_(bits(1, 4)), TupleType)
    assert isinstance(tuple_(sint(1, 4)), TupleType)
    assert isinstance(tuple_(uint(1, 4)), TupleType)
コード例 #5
0
def test_array():
    assert isinstance(array(1, 4), ArrayType)
    assert isinstance(array([1, 0, 0, 0]), ArrayType)
    assert isinstance(array(VCC), ArrayType)
    assert isinstance(array(array(1, 4)), ArrayType)
    assert isinstance(array(uint(1, 4)), ArrayType)
    assert isinstance(array(sint(1, 4)), ArrayType)
コード例 #6
0
ファイル: higher.py プロジェクト: nbenavi/CS448H
def rscanarg(iarg, oarg, interfaces, noiarg=False, nooarg=False):
    iargs = getarg(iarg, interfaces)
    oargs = getarg(oarg, interfaces)
    n = len(interfaces)
    for i in range(n - 1):
        wire(oargs[i + 1], iargs[i])

    args = []
    if not noiarg:
        args += ['input %s' % iarg, iargs[0]]
    if not nooarg:
        args += ['output %s' % oarg, array(*oargs)]
    return args
コード例 #7
0
ファイル: higher.py プロジェクト: nbenavi/CS448H
def uncurry(circuit, prefix='I'):

    uncurryargs = []
    for name, port in circuit.interface.ports.items():
        if name.startswith(prefix):
            assert port.direction == INPUT
            uncurryargs.append(port)

    args = ['input %s' % prefix, array(*uncurryargs)]

    for name, port in circuit.interface.ports.items():
        if not name.startswith(prefix):
            args += ['%s %s' % (port.direction, name), port]

    #print(args)
    return AnonymousCircuit(args)
コード例 #8
0
def test_print_ir():

    And2 = DeclareCircuit('And2', "I0", In(Bit), "I1", In(Bit), "O", Out(Bit)) 

    AndN2 = DefineCircuit("AndN2", "I", In(Array[2, Bit]), "O", Out(Bit) ) 
    and2 = And2() 
    wire( AndN2.I[0], and2.I0 ) 
    wire( AndN2.I[1], and2.I1 ) 
    wire( and2.O, AndN2.O ) 
    EndCircuit() 

    main = DefineCircuit("main", "I0", In(Bit), "I1", In(Bit), "O", Out(Bit)) 
    and2 = AndN2() 
    main.O( and2(array([main.I0, main.I1])) ) 
    EndCircuit() 

    result = compile(main)
    #print(result)
    assert result == """\
コード例 #9
0
    class _Counter(m.Circuit):
        name = name_
        io = m.IO(**args)
        io += m.ClockIO(has_ce, has_reset)

        add = Add(n, cin=True, cout=cout)
        reg = Register(n, has_ce=has_ce, has_reset=has_reset)

        m.wire(reg.O, add.I0)
        m.wire(m.array(n * [io.D]), add.I1)
        m.wire(io.U, add.CIN)

        reg(add)

        next = False
        if next:
            m.wire(add.O, io.O)
        else:
            m.wire(reg.O, io.O)
        if cout:
            m.wire(add.COUT, io.COUT)
コード例 #10
0
    class _Counter(m.Circuit):
        name = name_
        io = m.IO(**args)
        io += m.ClockIO(has_ce, has_reset)

        add = DefineAdd(n, cin=cin, cout=cout)()
        reg = Register(n, has_ce=has_ce, has_reset=has_reset)

        m.wire(reg.O, add.I0)
        m.wire(m.array(incr, n), add.I1)
        reg(add.O)

        next = False
        if next:
            m.wire(add.O, io.O)
        else:
            m.wire(reg.O, io.O)
        if cin:
            m.wire(io.CIN, add.CIN)
        if cout:
            m.wire(add.COUT, io.COUT)
コード例 #11
0
ファイル: hash_jtree.py プロジェクト: michaellu002/magma_271
        def definition(io):
            hash_in_width = (bits - 4) * 2
            hash_out_width = fractional_bits - 2
            (hash_mask) = io.get_hash_mask(io.sample_size)

            jitter = [
                define_tree_hash(hash_in_width, hash_out_width)()
                for _ in range(2)
            ]

            subsample1 = io.sample_in[1][4:bits]
            subsample0 = io.sample_in[0][4:bits]

            m.wire(jitter[0].data_in, m.bits(m.concat(subsample0, subsample1)))
            m.wire(jitter[0].mask, hash_mask)
            m.wire(jitter[1].data_in, m.bits(m.concat(subsample1, subsample0)))
            m.wire(jitter[1].mask, hash_mask)

            # Jitter the sample coordinates
            sample_jittered = m.array([
                m.bits(io.sample_in[i][0:bits])
                | m.concat(m.bits(0, fractional_bits - hash_out_width),
                           m.bits(jitter[i].data_out[0:hash_out_width]),
                           m.bits(0, integer_bits)) for i in range(2)
            ])

            # Put values into pipeline registers
            def wire_reg(reg, reg_input, reg_output=None):
                m.wire(reg_input, reg.data_in)
                m.wire(reg.clk, io.CLK)
                m.wire(reg.reset, io.RESET)
                m.wire(reg.en, m.bit(1))
                if reg_output is not None:
                    m.wire(reg.data_out, reg_output)

            poly_retime_r = dff.DefineDFF3(axes, vertices, bits,
                                           pipe_depth - 1, 1)()
            wire_reg(poly_retime_r, io.poly_in)

            poly_r = dff.DefineDFF3(axes, vertices, bits, 1, 0)()
            wire_reg(poly_r, poly_retime_r.data_out, io.poly_out)

            color_retime_r = dff.DefineDFF2(color_channels, bits,
                                            pipe_depth - 1, 1)()
            wire_reg(color_retime_r, io.color_in)

            color_r = dff.DefineDFF2(color_channels, bits, 1, 0)()
            wire_reg(color_r, color_retime_r.data_out, io.color_out)

            is_quad_retime_r = dff.DefineDFF(1, pipe_depth - 1, 1)()
            wire_reg(is_quad_retime_r, m.bits(io.is_quad_in))

            is_quad_r = dff.DefineDFF(1, 1, 0)()
            wire_reg(is_quad_r, is_quad_retime_r.data_out,
                     m.bits(io.is_quad_out))

            valid_sample_retime_r = dff.DefineDFF(1, pipe_depth - 1, 1)()
            wire_reg(valid_sample_retime_r, io.valid_sample_in)

            valid_sample_r = dff.DefineDFF(1, 1, 0)()
            wire_reg(valid_sample_r, valid_sample_retime_r.data_out,
                     io.valid_sample_out)

            sample_retime_r = dff.DefineDFF2(2, bits, pipe_depth - 1, 1)()
            wire_reg(sample_retime_r, sample_jittered)

            sample_r = dff.DefineDFF2(2, bits, 1, 0)()
            wire_reg(sample_r, sample_retime_r.data_out, io.sample_out)
コード例 #12
0
ファイル: reducenand2.py プロジェクト: splhack/loam
from magma import array, wire, compile, EndCircuit
from mantle import ReduceNAnd
from loam.boards.icestick import IceStick

icestick = IceStick()
for i in range(2):
    icestick.J1[i].input().on()
icestick.D5.on()

main = icestick.main()

nand2 = ReduceNAnd(2)
nand2(array([main.J1[0], main.J1[1]]))
wire(nand2.O, main.D5)

EndCircuit()
コード例 #13
0
ファイル: add1.py プロジェクト: splhack/loam
from magma import array, wire, compile, EndCircuit
from loam.boards.icestick import IceStick, Add

icestick = IceStick()
icestick.Clock.on()
icestick.J1[0].rename('A0').input().on()
icestick.J1[1].rename('B0').input().on()
icestick.D1.on()

main = icestick.main()
A = array([main.A0])
B = array([main.B0])
O = array([main.D1])

add = Add(1)

wire( add(A, B), O )

EndCircuit()
コード例 #14
0
import magma as m
from magma.bitutils import int2seq
from mantle.util.edge import falling, rising
from mantle import I0, I1, I2, I3
import mantle
from rom import ROM16
from uart import UART

trigger = m.VCC  # maybe tie to GPIO later

# ArduCAM start capture sequence
init = [
    # Change MCU mode
    m.array(int2seq(0x8200, 16)),
    # Start capture
    m.array(int2seq(0x8401, 16)),
    m.array(int2seq(0x8401, 16)),
    m.array(int2seq(0x8402, 16)),
    # check capture completion flag
    m.array(int2seq(0x4100, 16)),
    # Read image length
    m.array(int2seq(0x4200, 16)),
    m.array(int2seq(0x4300, 16)),
    m.array(int2seq(0x4400, 16)),
    # burst read
    m.array(int2seq(0x3CFF, 16)),
    m.array(int2seq(0x0000, 16)),
    m.array(int2seq(0x0000, 16)),
    m.array(int2seq(0x0000, 16)),
    m.array(int2seq(0x0000, 16)),
    m.array(int2seq(0x0000, 16)),
コード例 #15
0
ファイル: romb4.py プロジェクト: splhack/loam
icestick.Clock.on()
icestick.J1[0].rename('I0').input().on()
icestick.J1[1].rename('I1').input().on()
icestick.J1[2].rename('I2').input().on()
icestick.J1[3].rename('I3').input().on()
icestick.J1[4].rename('I4').input().on()
icestick.J1[5].rename('I5').input().on()
icestick.J1[6].rename('I6').input().on()
icestick.J1[7].rename('I7').input().on()
icestick.J3[0].rename('D0').output().on()
icestick.J3[1].rename('D1').output().on()
icestick.J3[2].rename('D2').output().on()
icestick.J3[3].rename('D3').output().on()

main = icestick.main()
I = array([main.I0, main.I1, main.I2, main.I3,
          main.I4, main.I5, main.I6, main.I7, 0, 0])
O = array([main.D0, main.D1, main.D2, main.D3])

N = 4
M = 4096//N
rom = M * [0]
for i in range(M):
    rom[i] = i & 0xf

romb = ROMB( M, N, rom )
#print(romb.interface)

wire( 1, romb.RE    )
wire( I, romb.RADDR )
wire( romb.RDATA, O)
コード例 #16
0
ファイル: higher.py プロジェクト: nbenavi/CS448H
def joinarg(arg, interfaces):
    args = getarg(arg, interfaces)
    direction = getdirection(args)
    #print('joinarg', args)
    return ['%s %s' % (direction, arg), array(*args)]
コード例 #17
0
ファイル: ftdi.py プロジェクト: splhack/loam
import magma as m
from magma.bitutils import int2seq
import mantle
from loam.boards.icestick import IceStick
from rom import ROM

icestick = IceStick()
icestick.Clock.on()
icestick.TX.output().on()

main = icestick.main()

valid = 1

init = [m.array(int2seq(ord(c), 8)) for c in 'hello, world  \r\n']

printf = mantle.Counter(4, has_ce=True)
rom = ROM(4, init, printf.O)

data = m.array([rom.O[7], rom.O[6], rom.O[5], rom.O[4],
                rom.O[3], rom.O[2], rom.O[1], rom.O[0], 0])

counter = mantle.CounterModM(103, 8)
baud = counter.COUT

count = mantle.Counter(4, has_ce=True, has_reset=True)
decode = mantle.Decode(15, 4)
done = decode(count.O)

run = mantle.DFF(has_ce=True)
run_n = mantle.LUT3([0,0,1,0, 1,0,1,0])
コード例 #18
0
from mantle import I0, I1
from uart import UART

# convolution window size
x = 3
y = 3

# image size (height and width)
dim = 16

buf_size = 64

weights = m.array([
    m.array([m.array([1]), m.array([1]),
             m.array([1])]),
    m.array([m.array([1]), m.array([1]),
             m.array([1])]),
    m.array([m.array([0]), m.array([0]),
             m.array([0])])
])


class Convolution(m.Circuit):
    name = "Rescale"
    IO = [
        'CLK',
        m.In(m.Clock), 'LOAD',
        m.In(m.Bit), 'DATA',
        m.In(m.Bits(width)), 'SCK',
        m.In(m.Bit), 'WADDR',
        m.Out(m.Bits(5)), 'O',
        m.Out(m.Bits(32)), 'VALID',
コード例 #19
0
 def __init__(self):
     self.register_array: m.Array[15, m.Bits[1024]] = \
         m.array([m.bits(0, 1024) for _ in range(15)])
コード例 #20
0
ファイル: conv_test.py プロジェクト: essox514/magmacam
hx8kboard.J2[8].input().on()

main = hx8kboard.main()

# convolution window size
x = 3
y = 3

# image size (height and width)
dim = 16

weights = m.array([
    m.array([m.array([1]), m.array([1]),
             m.array([1])]),
    m.array([m.array([1]), m.array([1]),
             m.array([1])]),
    m.array([m.array([0]), m.array([0]),
             m.array([0])])
])

# Generate the SCLK signal (12 MHz/32 = 375 kHz)
clk_counter = mantle.Counter(2)
sclk = clk_counter.O[-1]

# Initialize Modules

# ArduCAM
cam = ArduCAM()
m.wire(main.CLKIN, cam.CLK)
m.wire(sclk, cam.SCK)
m.wire(main.J2_8, cam.MISO)
コード例 #21
0
from magma import array, wire, compile, EndCircuit
from loam.boards.icestick import IceStick, Mux

icestick = IceStick()
icestick.J1[0].rename('I0').input().on()
icestick.J1[1].rename('I1').input().on()
icestick.J1[2].rename('S').input().on()
icestick.D1.on()

main = icestick.main()
I = array([main.I0, main.I1])
S = main.S

mux = Mux(2)
mux(I, S)
wire(mux.O, main.D1)

EndCircuit()
コード例 #22
0
from magma import array, wire, compile, EndCircuit
from loam.boards.icestick import IceStick, Negate

icestick = IceStick()
icestick.J1[0].rename('I0').input().on()
icestick.J1[1].rename('I1').input().on()
icestick.D1.on()
icestick.D2.on()

main = icestick.main()
I = array([main.I0, main.I1])
O = array([main.D1, main.D2])

neg = Negate(2)

wire(neg(I), O)

EndCircuit()
コード例 #23
0
ファイル: romb2.py プロジェクト: splhack/loam
icestick = IceStick()
icestick.Clock.on()
icestick.J1[0].rename('I0').input().on()
icestick.J1[1].rename('I1').input().on()
icestick.J1[2].rename('I2').input().on()
icestick.J1[3].rename('I3').input().on()
icestick.J1[4].rename('I4').input().on()
icestick.J1[5].rename('I5').input().on()
icestick.J1[6].rename('I6').input().on()
icestick.J1[7].rename('I7').input().on()
icestick.J3[0].rename('D0').output().on()
icestick.J3[1].rename('D1').output().on()

main = icestick.main()
I = array([main.I0, main.I1, main.I2, main.I3, 0, 0, 0, 0, 0, 0, 0])
O = array([main.D0, main.D1])

N = 2
M = 4096 // N
rom = M * [0]
for i in range(M):
    rom[i] = i & 0x3

romb = ROMB(M, N, rom)
#print(romb.interface)

wire(1, romb.RE)
wire(I, romb.RADDR)
wire(romb.RDATA, O)
コード例 #24
0
ファイル: romb8.py プロジェクト: splhack/loam
icestick.J1[4].rename('I4').input().on()
icestick.J1[5].rename('I5').input().on()
icestick.J1[6].rename('I6').input().on()
icestick.J1[7].rename('I7').input().on()
icestick.J3[0].rename('D0').output().on()
icestick.J3[1].rename('D1').output().on()
icestick.J3[2].rename('D2').output().on()
icestick.J3[3].rename('D3').output().on()
icestick.J3[4].rename('D4').output().on()
icestick.J3[5].rename('D5').output().on()
icestick.J3[6].rename('D6').output().on()
icestick.J3[7].rename('D7').output().on()

main = icestick.main()
I = array([
    main.I0, main.I1, main.I2, main.I3, main.I4, main.I5, main.I6, main.I7, 0
])
O = array(
    [main.D0, main.D1, main.D2, main.D3, main.D4, main.D5, main.D6, main.D7])

N = 8
M = 4096 // N
rom = M * [0]
for i in range(M):
    rom[i] = i & 0xff

romb = ROMB(M, N, rom)
#print(romb.interface)

wire(1, romb.RE)
wire(I, romb.RADDR)
コード例 #25
0
ファイル: adc2.py プロジェクト: splhack/loam
from magma import array, wire, compile, EndCircuit
from loam.boards.icestick import IceStick, Add

icestick = IceStick()
icestick.Clock.on()
icestick.J1[0].rename('A0').input().on()
icestick.J1[1].rename('A1').input().on()
icestick.J1[2].rename('B0').input().on()
icestick.J1[3].rename('B1').input().on()
icestick.J1[4].rename('CIN').input().on()
icestick.D1.on()
icestick.D2.on()
icestick.D3.on()

main = icestick.main()
A = array([main.A0, main.A1])
B = array([main.B0, main.B1])
O = array([main.D1, main.D2])

add = Add(2, True, True)

add(A, B, main.CIN)

wire(add.O[0], main.D1)
wire(add.O[1], main.D2)
wire(add.COUT, main.D3)

EndCircuit()
コード例 #26
0
def test_sint():
    assert isinstance(sint(1, 4), SIntType)
    assert isinstance(sint([1, 0, 0, 0]), SIntType)
    assert isinstance(sint(VCC), SIntType)
    assert isinstance(sint(array(1, 4)), SIntType)
    assert isinstance(sint(bits(1, 4)), SIntType)
コード例 #27
0
from magma import array, wire, compile, EndCircuit
from loam.boards.icestick import IceStick, NE

icestick = IceStick()
icestick.J1[0].rename('I0').input().on()
icestick.J1[1].rename('I1').input().on()
icestick.J1[2].rename('I2').input().on()
icestick.J1[3].rename('I3').input().on()
icestick.D1.on()

main = icestick.main()
I0 = array([main.I0, main.I1, main.I2, main.I3])
I1 = array([0,1,0,1])

ne4 = NE(4)
ne4(I0,I1)
wire(ne4.O, main.D1)

EndCircuit()
コード例 #28
0
ファイル: higher.py プロジェクト: nbenavi/CS448H
def inputs(circuit):
    input = circuit.interface.inputs()
    if len(input) == 1:
        return input[0]
    else:
        return array(*input)
コード例 #29
0
from mantle.lattice.ice40.RAMB import RAMB

icestick = IceStick()
icestick.Clock.on()
icestick.J1[0].rename('I0').input().on()
icestick.J1[1].rename('I1').input().on()
icestick.J1[2].rename('I2').input().on()
icestick.J1[3].rename('I3').input().on()
icestick.J1[4].rename('I4').input().on()
icestick.J1[5].rename('I5').input().on()
icestick.J1[6].rename('I6').input().on()
icestick.J3[0].rename('D0').output().on()
icestick.J3[1].rename('D1').output().on()

main = icestick.main()
WDATA = array([main.I0, main.I1])
WADDR = array([main.I2, main.I3, 0, 0, 0, 0, 0, 0, 0, 0, 0])
RADDR = array([main.I4, main.I5, 0, 0, 0, 0, 0, 0, 0, 0, 0])
WE = main.I6
O = array([main.D0, main.D1])

N = 2
M = 4096 // N
rom = M * [0]
for i in range(M):
    rom[i] = i & 0x3

ramb = RAMB(M, N, rom)
#print(ramb.interface)

wire(WE, ramb.WE)
コード例 #30
0
    def __init__(self, x_len, n_ways: int, n_sets: int, b_bytes: int):
        b_bits = b_bytes << 3
        b_len = m.bitutils.clog2(b_bytes)
        s_len = m.bitutils.clog2(n_sets)
        t_len = x_len - (s_len + b_len)
        n_words = b_bits // x_len
        w_bytes = x_len // 8
        byte_offset_bits = m.bitutils.clog2(w_bytes)
        nasti_params = NastiParameters(data_bits=64,
                                       addr_bits=x_len,
                                       id_bits=5)
        data_beats = b_bits // nasti_params.x_data_bits

        class MetaData(m.Product):
            tag = m.UInt[t_len]

        self.io = m.IO(**make_cache_ports(x_len, nasti_params))
        self.io += m.ClockIO()

        class State(m.Enum):
            IDLE = 0
            READ_CACHE = 1
            WRITE_CACHE = 2
            WRITE_BACK = 3
            WRITE_ACK = 4
            REFILL_READY = 5
            REFILL = 6

        state = m.Register(init=State.IDLE)()

        # memory
        v = m.Register(m.UInt[n_sets], has_enable=True)()
        d = m.Register(m.UInt[n_sets], has_enable=True)()
        meta_mem = m.Memory(n_sets,
                            MetaData,
                            read_latency=1,
                            has_read_enable=True)()
        data_mem = [
            ArrayMaskMem(n_sets,
                         w_bytes,
                         m.UInt[8],
                         read_latency=1,
                         has_read_enable=True)() for _ in range(n_words)
        ]

        addr_reg = m.Register(type(self.io.cpu.req.data.addr).undirected_t,
                              has_enable=True)()
        cpu_data = m.Register(type(self.io.cpu.req.data.data).undirected_t,
                              has_enable=True)()
        cpu_mask = m.Register(type(self.io.cpu.req.data.mask).undirected_t,
                              has_enable=True)()

        self.io.nasti.r.ready @= state.O == State.REFILL
        # Counters
        assert data_beats > 0
        if data_beats > 1:
            read_counter = mantle.CounterModM(data_beats,
                                              max(data_beats.bit_length(), 1),
                                              has_ce=True)
            read_counter.CE @= m.enable(self.io.nasti.r.fired())
            read_count, read_wrap_out = read_counter.O, read_counter.COUT

            write_counter = mantle.CounterModM(data_beats,
                                               max(data_beats.bit_length(), 1),
                                               has_ce=True)
            write_count, write_wrap_out = write_counter.O, write_counter.COUT
        else:
            read_count, read_wrap_out = 0, 1
            write_count, write_wrap_out = 0, 1

        refill_buf = m.Register(m.Array[data_beats,
                                        m.UInt[nasti_params.x_data_bits]],
                                has_enable=True)()
        if data_beats == 1:
            refill_buf.I[0] @= self.io.nasti.r.data.data
        else:
            refill_buf.I @= m.set_index(refill_buf.O,
                                        self.io.nasti.r.data.data,
                                        read_count[:-1])
        refill_buf.CE @= m.enable(self.io.nasti.r.fired())

        is_idle = state.O == State.IDLE
        is_read = state.O == State.READ_CACHE
        is_write = state.O == State.WRITE_CACHE
        is_alloc = (state.O == State.REFILL) & read_wrap_out
        # m.display("[%0t]: is_alloc = %x", m.time(), is_alloc)\
        #     .when(m.posedge(self.io.CLK))
        is_alloc_reg = m.Register(m.Bit)()(is_alloc)

        hit = m.Bit(name="hit")
        wen = is_write & (hit | is_alloc_reg) & ~self.io.cpu.abort | is_alloc
        # m.display("[%0t]: wen = %x", m.time(), wen)\
        #     .when(m.posedge(self.io.CLK))
        ren = m.enable(~wen & (is_idle | is_read) & self.io.cpu.req.valid)
        ren_reg = m.enable(m.Register(m.Bit)()(ren))

        addr = self.io.cpu.req.data.addr
        idx = addr[b_len:s_len + b_len]
        tag_reg = addr_reg.O[s_len + b_len:x_len]
        idx_reg = addr_reg.O[b_len:s_len + b_len]
        off_reg = addr_reg.O[byte_offset_bits:b_len]

        rmeta = meta_mem.read(idx, ren)
        rdata = m.concat(*(mem.read(idx, ren) for mem in data_mem))
        rdata_buf = m.Register(type(rdata), has_enable=True)()(rdata,
                                                               CE=ren_reg)

        read = m.mux([
            m.as_bits(m.mux([rdata_buf, rdata], ren_reg)),
            m.as_bits(refill_buf.O)
        ], is_alloc_reg)
        # m.display("is_alloc_reg=%x", is_alloc_reg)\
        #     .when(m.posedge(self.io.CLK))

        hit @= v.O[idx_reg] & (rmeta.tag == tag_reg)

        # read mux
        self.io.cpu.resp.data.data @= m.array(
            [read[i * x_len:(i + 1) * x_len] for i in range(n_words)])[off_reg]
        self.io.cpu.resp.valid @= (is_idle | (is_read & hit) |
                                   (is_alloc_reg & ~cpu_mask.O.reduce_or()))
        m.display("resp.valid=%x", self.io.cpu.resp.valid.value())\
            .when(m.posedge(self.io.CLK))
        m.display("[%0t]: valid = %x", m.time(),
                  self.io.cpu.resp.valid.value())\
            .when(m.posedge(self.io.CLK))
        m.display("[%0t]: is_idle = %x, is_read = %x, hit = %x, is_alloc_reg = "
                  "%x, ~cpu_mask.O.reduce_or() = %x", m.time(), is_idle,
                  is_read, hit, is_alloc_reg, ~cpu_mask.O.reduce_or())\
            .when(m.posedge(self.io.CLK))
        m.display("[%0t]: refill_buf.O=%x, %x", m.time(), *refill_buf.O)\
            .when(m.posedge(self.io.CLK))\
            .if_(self.io.cpu.resp.valid.value() & is_alloc_reg)
        m.display("[%0t]: read=%x", m.time(), read)\
            .when(m.posedge(self.io.CLK))\
            .if_(self.io.cpu.resp.valid.value() & is_alloc_reg)

        addr_reg.I @= addr
        addr_reg.CE @= m.enable(self.io.cpu.resp.valid.value())

        cpu_data.I @= self.io.cpu.req.data.data
        cpu_data.CE @= m.enable(self.io.cpu.resp.valid.value())

        cpu_mask.I @= self.io.cpu.req.data.mask
        cpu_mask.CE @= m.enable(self.io.cpu.resp.valid.value())

        wmeta = MetaData(name="wmeta")
        wmeta.tag @= tag_reg

        offset_mask = (m.zext_to(cpu_mask.O, w_bytes * 8) << m.concat(
            m.bits(0, byte_offset_bits), off_reg))
        wmask = m.mux([m.SInt[w_bytes * 8](-1),
                       m.sint(offset_mask)], ~is_alloc)

        if len(refill_buf.O) == 1:
            wdata_alloc = self.io.nasti.r.data.data
        else:
            wdata_alloc = m.concat(
                # TODO: not sure why they use `init.reverse`
                # https://github.com/ucb-bar/riscv-mini/blob/release/src/main/scala/Cache.scala#L116
                m.concat(*refill_buf.O[:-1]),
                self.io.nasti.r.data.data)
        wdata = m.mux([wdata_alloc,
                       m.as_bits(m.repeat(cpu_data.O, n_words))], ~is_alloc)

        v.I @= m.set_index(v.O, m.bit(True), idx_reg)
        v.CE @= m.enable(wen)
        d.I @= m.set_index(d.O, ~is_alloc, idx_reg)
        d.CE @= m.enable(wen)
        # m.display("[%0t]: refill_buf.O = %x", m.time(),
        #           m.concat(*refill_buf.O)).when(m.posedge(self.io.CLK)).if_(wen)
        # m.display("[%0t]: nasti.r.data.data = %x", m.time(),
        #           self.io.nasti.r.data.data).when(m.posedge(self.io.CLK)).if_(wen)

        meta_mem.write(wmeta, idx_reg, m.enable(wen & is_alloc))
        for i, mem in enumerate(data_mem):
            data = [
                wdata[i * x_len + j * 8:i * x_len + (j + 1) * 8]
                for j in range(w_bytes)
            ]
            mem.write(m.array(data), idx_reg,
                      wmask[i * w_bytes:(i + 1) * w_bytes], m.enable(wen))
            # m.display("[%0t]: wdata = %x, %x, %x, %x", m.time(),
            #           *mem.WDATA.value()).when(m.posedge(self.io.CLK)).if_(wen)
            # m.display("[%0t]: wmask = %x, %x, %x, %x", m.time(),
            #           *mem.WMASK.value()).when(m.posedge(self.io.CLK)).if_(wen)

        tag_and_idx = m.zext_to(m.concat(idx_reg, tag_reg),
                                nasti_params.x_addr_bits)
        self.io.nasti.ar.data @= NastiReadAddressChannel(
            nasti_params, 0, tag_and_idx << m.Bits[len(tag_and_idx)](b_len),
            m.bitutils.clog2(nasti_params.x_data_bits // 8), data_beats - 1)

        rmeta_and_idx = m.zext_to(m.concat(idx_reg, rmeta.tag),
                                  nasti_params.x_addr_bits)
        self.io.nasti.aw.data @= NastiWriteAddressChannel(
            nasti_params, 0,
            rmeta_and_idx << m.Bits[len(rmeta_and_idx)](b_len),
            m.bitutils.clog2(nasti_params.x_data_bits // 8), data_beats - 1)

        self.io.nasti.w.data @= NastiWriteDataChannel(
            nasti_params,
            m.array([
                read[i * nasti_params.x_data_bits:(i + 1) *
                     nasti_params.x_data_bits] for i in range(data_beats)
            ])[write_count[:-1]], None, write_wrap_out)

        is_dirty = v.O[idx_reg] & d.O[idx_reg]

        # TODO: Have to use temporary so we can invoke `fired()`
        aw_valid = m.Bit(name="aw_valid")
        self.io.nasti.aw.valid @= aw_valid

        ar_valid = m.Bit(name="ar_valid")
        self.io.nasti.ar.valid @= ar_valid

        b_ready = m.Bit(name="b_ready")
        self.io.nasti.b.ready @= b_ready

        @m.inline_combinational()
        def logic():
            state.I @= state.O
            aw_valid @= False
            ar_valid @= False
            self.io.nasti.w.valid @= False
            b_ready @= False
            if state.O == State.IDLE:
                if self.io.cpu.req.valid:
                    if self.io.cpu.req.data.mask.reduce_or():
                        state.I @= State.WRITE_CACHE
                    else:
                        state.I @= State.READ_CACHE
            elif state.O == State.READ_CACHE:
                if hit:
                    if self.io.cpu.req.valid:
                        if self.io.cpu.req.data.mask.reduce_or():
                            state.I @= State.WRITE_CACHE
                        else:
                            state.I @= State.READ_CACHE
                    else:
                        state.I @= State.IDLE
                else:
                    aw_valid @= is_dirty
                    ar_valid @= ~is_dirty
                    if self.io.nasti.aw.fired():
                        state.I @= State.WRITE_BACK
                    elif self.io.nasti.ar.fired():
                        state.I @= State.REFILL
            elif state.O == State.WRITE_CACHE:
                if hit | is_alloc_reg | self.io.cpu.abort:
                    state.I @= State.IDLE
                else:
                    aw_valid @= is_dirty
                    ar_valid @= ~is_dirty
                    if self.io.nasti.aw.fired():
                        state.I @= State.WRITE_BACK
                    elif self.io.nasti.ar.fired():
                        state.I @= State.REFILL
            elif state.O == State.WRITE_BACK:
                self.io.nasti.w.valid @= True
                if write_wrap_out:
                    state.I @= State.WRITE_ACK
            elif state.O == State.WRITE_ACK:
                b_ready @= True
                if self.io.nasti.b.fired():
                    state.I @= State.REFILL_READY
            elif state.O == State.REFILL_READY:
                ar_valid @= True
                if self.io.nasti.ar.fired():
                    state.I @= State.REFILL
            elif state.O == State.REFILL:
                if read_wrap_out:
                    if cpu_mask.O.reduce_or():
                        state.I @= State.WRITE_CACHE
                    else:
                        state.I @= State.IDLE

        if data_beats > 1:
            # TODO: Have to do this at the end since the inline comb logic
            # wires up nasti.w
            write_counter.CE @= m.enable(self.io.nasti.w.fired())