def get_hash_mask(sample_size: SampleSize) -> (m.Bits(4)): if sample_size == SampleSize.ONE_PIXEL: hash_mask = m.repeat(m.bit(0), 4) elif sample_size == SampleSize.HALF_PIXEL: hash_mask = m.concat(m.bit(1), m.repeat(m.bit(0), 3)) elif sample_size == SampleSize.QUARTER_PIXEL: hash_mask = m.concat(m.repeat(m.bit(1), 2), m.repeat(m.bit(0), 2)) else: hash_mask = m.concat(m.repeat(m.bit(1), 3), m.bit(0)) return (hash_mask)
def writeport(addr_width, width, regs, WADDR, I, WE): n = 1 << addr_width decoder = Decoder(addr_width) enable = And(2,n) enable(decoder(WADDR), repeat(WE, n)) for i in range(n): regs[i](I, CE=m.enable(enable.O[i]))
def writeport(height, width, regs, WADDR, I, WE): n = 1 << height decoder = Decoder(height) enable = And(2, n) enable(decoder(WADDR), repeat(WE, n)) for i in range(n): regs[i](I, CE=enable.O[i])
def NastiWriteDataChannel(nasti_params, data, strb=None, last=True, id=0): w = make_NastiWriteDataChannel(nasti_params)() if strb is None: strb = m.repeat(1, nasti_params.w_strobe_bits) w.strb @= strb w.data @= data w.last @= last w.id @= id w.user @= 0 return w
def get_hash_mask(sample_size: SampleSize) -> (m.Bits(8)): if sample_size == SampleSize.ONE_PIXEL: hash_mask = m.repeat(m.bit(1), 8) elif sample_size == SampleSize.HALF_PIXEL: hash_mask = m.concat(m.repeat(m.bit(1), 7), m.repeat(m.bit(0), 1)) elif sample_size == SampleSize.QUARTER_PIXEL: hash_mask = m.concat(m.repeat(m.bit(1), 6), m.repeat(m.bit(0), 2)) else: #elif sample_size == SampleSize.EIGHTH_PIXEL: hash_mask = m.concat(m.repeat(m.bit(1), 5), m.repeat(m.bit(0), 3)) return (hash_mask)
def definition(io): # ------------------- # Your code goes here # ------------------- # You may define any combinational functions you may need # Finally, assign values to # box_clamped # box_valid # These signals feed into the pipeline registers x_comp = m.concat(\ m.bits(io.poly_in[0][0]) <= m.bits(io.poly_in[1][0]), \ m.bits(io.poly_in[1][0]) <= m.bits(io.poly_in[2][0]), \ m.bits(io.poly_in[0][0]) <= m.bits(io.poly_in[2][0])) y_comp = m.concat(\ m.bits(io.poly_in[0][1]) <= m.bits(io.poly_in[1][1]), \ m.bits(io.poly_in[1][1]) <= m.bits(io.poly_in[2][1]), \ m.bits(io.poly_in[0][1]) <= m.bits(io.poly_in[2][1])) (ll_x, ur_x) = io.return_ll_ur(x_comp, m.bit(0)) (ll_y, ur_y) = io.return_ll_ur(y_comp, m.bit(1)) (hash_mask) = io.get_hash_mask(io.sample_size) box_init = Polygon(2, 2, bits) rounded_box = Polygon(2, 2, bits) box_clamped = Polygon(2, 2, bits) box_init[0][0] = ll_x box_init[1][0] = ur_x box_init[0][1] = ll_y box_init[1][1] = ur_y m.wire(box_init[0][0][fractional_bits:bits - 1], rounded_box[0][0][fractional_bits:bits - 1]) m.wire(box_init[0][1][fractional_bits:bits - 1], rounded_box[0][1][fractional_bits:bits - 1]) m.wire(box_init[1][0][fractional_bits:bits - 1], rounded_box[1][0][fractional_bits:bits - 1]) m.wire(box_init[1][1][fractional_bits:bits - 1], rounded_box[1][1][fractional_bits:bits - 1]) m.wire( box_init[0][0][0:fractional_bits - 1], m.concat( m.repeat(m.bit(0), 6), (rounded_box[0][0][6:fractional_bits - 1] & hash_mask))) m.wire( box_init[0][1][0:fractional_bits - 1], m.concat( m.repeat(m.bit(0), 6), (rounded_box[0][1][6:fractional_bits - 1] & hash_mask))) m.wire( box_init[1][0][0:fractional_bits - 1], m.concat( m.repeat(m.bit(0), 6), (rounded_box[1][0][6:fractional_bits - 1] & hash_mask))) m.wire( box_init[1][1][0:fractional_bits - 1], m.concat( m.repeat(m.bit(0), 6), (rounded_box[1][1][6:fractional_bits - 1] & hash_mask))) box_clamped[0][0] = mux(rounded_box[0][0], m.repeat(m.bit(0), bits), (rounded_box[0][0] < 0)) box_clamped[1][0] = mux(rounded_box[1][0], io.screen_max[0], (rounded_box[1][0] > io.screen_max[0])) box_clamped[0][1] = mux(rounded_box[0][1], m.repeat(m.bit(0), bits), (rounded_box[0][1] < 0)) box_clamped[1][1] = mux(rounded_box[1][1], io.screen_max[1], (rounded_box[1][1] > io.screen_max[1])) box_valid = io.valid_in & ~( (rounded_box[0][0] < 0) | (rounded_box[1][0] > io.screen_max[0]) | (rounded_box[0][1] < 0) | (rounded_box[1][1] > io.screen_max[1])) # ------------------- # Your code goes here # ------------------- # Put values into pipeline registers def wire_reg(reg, reg_input, reg_output=None): m.wire(reg_input, reg.data_in) m.wire(reg.clk, io.CLK) m.wire(reg.reset, io.RESET) m.wire(reg.en, io.halt[0]) if reg_output is not None: m.wire(reg.data_out, reg_output) poly_retime_r = dff.DefineDFF3(axes, vertices, bits, pipe_depth - 1, 1)() wire_reg(poly_retime_r, io.poly_in) poly_r = dff.DefineDFF3(axes, vertices, bits, 1, 0)() wire_reg(poly_r, poly_retime_r.data_out, io.poly_out) color_retime_r = dff.DefineDFF2(color_channels, bits, pipe_depth - 1, 1)() wire_reg(color_retime_r, io.color_in) color_r = dff.DefineDFF2(color_channels, bits, 1, 0)() wire_reg(color_r, color_retime_r.data_out, io.color_out) box_retime_r = dff.DefineDFF3(2, 2, bits, pipe_depth - 1, 1)() wire_reg(box_retime_r, box_clamped) box_r = dff.DefineDFF3(2, 2, bits, 1, 0)() wire_reg(box_r, box_retime_r.data_out, io.box) valid_retime_r = dff.DefineDFF(1, pipe_depth - 1, 1)() wire_reg(valid_retime_r, box_valid) valid_r = dff.DefineDFF(1, 1, 0)() wire_reg(valid_r, valid_retime_r.data_out, io.valid_out) is_quad_retime_r = dff.DefineDFF(1, pipe_depth - 1, 1)() wire_reg(is_quad_retime_r, m.bits(io.is_quad_in)) is_quad_r = dff.DefineDFF(1, 1, 0)() wire_reg(is_quad_r, is_quad_retime_r.data_out, m.bits(io.is_quad_out))
def __init__(self, x_len, n_ways: int, n_sets: int, b_bytes: int): b_bits = b_bytes << 3 b_len = m.bitutils.clog2(b_bytes) s_len = m.bitutils.clog2(n_sets) t_len = x_len - (s_len + b_len) n_words = b_bits // x_len w_bytes = x_len // 8 byte_offset_bits = m.bitutils.clog2(w_bytes) nasti_params = NastiParameters(data_bits=64, addr_bits=x_len, id_bits=5) data_beats = b_bits // nasti_params.x_data_bits class MetaData(m.Product): tag = m.UInt[t_len] self.io = m.IO(**make_cache_ports(x_len, nasti_params)) self.io += m.ClockIO() class State(m.Enum): IDLE = 0 READ_CACHE = 1 WRITE_CACHE = 2 WRITE_BACK = 3 WRITE_ACK = 4 REFILL_READY = 5 REFILL = 6 state = m.Register(init=State.IDLE)() # memory v = m.Register(m.UInt[n_sets], has_enable=True)() d = m.Register(m.UInt[n_sets], has_enable=True)() meta_mem = m.Memory(n_sets, MetaData, read_latency=1, has_read_enable=True)() data_mem = [ ArrayMaskMem(n_sets, w_bytes, m.UInt[8], read_latency=1, has_read_enable=True)() for _ in range(n_words) ] addr_reg = m.Register(type(self.io.cpu.req.data.addr).undirected_t, has_enable=True)() cpu_data = m.Register(type(self.io.cpu.req.data.data).undirected_t, has_enable=True)() cpu_mask = m.Register(type(self.io.cpu.req.data.mask).undirected_t, has_enable=True)() self.io.nasti.r.ready @= state.O == State.REFILL # Counters assert data_beats > 0 if data_beats > 1: read_counter = mantle.CounterModM(data_beats, max(data_beats.bit_length(), 1), has_ce=True) read_counter.CE @= m.enable(self.io.nasti.r.fired()) read_count, read_wrap_out = read_counter.O, read_counter.COUT write_counter = mantle.CounterModM(data_beats, max(data_beats.bit_length(), 1), has_ce=True) write_count, write_wrap_out = write_counter.O, write_counter.COUT else: read_count, read_wrap_out = 0, 1 write_count, write_wrap_out = 0, 1 refill_buf = m.Register(m.Array[data_beats, m.UInt[nasti_params.x_data_bits]], has_enable=True)() if data_beats == 1: refill_buf.I[0] @= self.io.nasti.r.data.data else: refill_buf.I @= m.set_index(refill_buf.O, self.io.nasti.r.data.data, read_count[:-1]) refill_buf.CE @= m.enable(self.io.nasti.r.fired()) is_idle = state.O == State.IDLE is_read = state.O == State.READ_CACHE is_write = state.O == State.WRITE_CACHE is_alloc = (state.O == State.REFILL) & read_wrap_out # m.display("[%0t]: is_alloc = %x", m.time(), is_alloc)\ # .when(m.posedge(self.io.CLK)) is_alloc_reg = m.Register(m.Bit)()(is_alloc) hit = m.Bit(name="hit") wen = is_write & (hit | is_alloc_reg) & ~self.io.cpu.abort | is_alloc # m.display("[%0t]: wen = %x", m.time(), wen)\ # .when(m.posedge(self.io.CLK)) ren = m.enable(~wen & (is_idle | is_read) & self.io.cpu.req.valid) ren_reg = m.enable(m.Register(m.Bit)()(ren)) addr = self.io.cpu.req.data.addr idx = addr[b_len:s_len + b_len] tag_reg = addr_reg.O[s_len + b_len:x_len] idx_reg = addr_reg.O[b_len:s_len + b_len] off_reg = addr_reg.O[byte_offset_bits:b_len] rmeta = meta_mem.read(idx, ren) rdata = m.concat(*(mem.read(idx, ren) for mem in data_mem)) rdata_buf = m.Register(type(rdata), has_enable=True)()(rdata, CE=ren_reg) read = m.mux([ m.as_bits(m.mux([rdata_buf, rdata], ren_reg)), m.as_bits(refill_buf.O) ], is_alloc_reg) # m.display("is_alloc_reg=%x", is_alloc_reg)\ # .when(m.posedge(self.io.CLK)) hit @= v.O[idx_reg] & (rmeta.tag == tag_reg) # read mux self.io.cpu.resp.data.data @= m.array( [read[i * x_len:(i + 1) * x_len] for i in range(n_words)])[off_reg] self.io.cpu.resp.valid @= (is_idle | (is_read & hit) | (is_alloc_reg & ~cpu_mask.O.reduce_or())) m.display("resp.valid=%x", self.io.cpu.resp.valid.value())\ .when(m.posedge(self.io.CLK)) m.display("[%0t]: valid = %x", m.time(), self.io.cpu.resp.valid.value())\ .when(m.posedge(self.io.CLK)) m.display("[%0t]: is_idle = %x, is_read = %x, hit = %x, is_alloc_reg = " "%x, ~cpu_mask.O.reduce_or() = %x", m.time(), is_idle, is_read, hit, is_alloc_reg, ~cpu_mask.O.reduce_or())\ .when(m.posedge(self.io.CLK)) m.display("[%0t]: refill_buf.O=%x, %x", m.time(), *refill_buf.O)\ .when(m.posedge(self.io.CLK))\ .if_(self.io.cpu.resp.valid.value() & is_alloc_reg) m.display("[%0t]: read=%x", m.time(), read)\ .when(m.posedge(self.io.CLK))\ .if_(self.io.cpu.resp.valid.value() & is_alloc_reg) addr_reg.I @= addr addr_reg.CE @= m.enable(self.io.cpu.resp.valid.value()) cpu_data.I @= self.io.cpu.req.data.data cpu_data.CE @= m.enable(self.io.cpu.resp.valid.value()) cpu_mask.I @= self.io.cpu.req.data.mask cpu_mask.CE @= m.enable(self.io.cpu.resp.valid.value()) wmeta = MetaData(name="wmeta") wmeta.tag @= tag_reg offset_mask = (m.zext_to(cpu_mask.O, w_bytes * 8) << m.concat( m.bits(0, byte_offset_bits), off_reg)) wmask = m.mux([m.SInt[w_bytes * 8](-1), m.sint(offset_mask)], ~is_alloc) if len(refill_buf.O) == 1: wdata_alloc = self.io.nasti.r.data.data else: wdata_alloc = m.concat( # TODO: not sure why they use `init.reverse` # https://github.com/ucb-bar/riscv-mini/blob/release/src/main/scala/Cache.scala#L116 m.concat(*refill_buf.O[:-1]), self.io.nasti.r.data.data) wdata = m.mux([wdata_alloc, m.as_bits(m.repeat(cpu_data.O, n_words))], ~is_alloc) v.I @= m.set_index(v.O, m.bit(True), idx_reg) v.CE @= m.enable(wen) d.I @= m.set_index(d.O, ~is_alloc, idx_reg) d.CE @= m.enable(wen) # m.display("[%0t]: refill_buf.O = %x", m.time(), # m.concat(*refill_buf.O)).when(m.posedge(self.io.CLK)).if_(wen) # m.display("[%0t]: nasti.r.data.data = %x", m.time(), # self.io.nasti.r.data.data).when(m.posedge(self.io.CLK)).if_(wen) meta_mem.write(wmeta, idx_reg, m.enable(wen & is_alloc)) for i, mem in enumerate(data_mem): data = [ wdata[i * x_len + j * 8:i * x_len + (j + 1) * 8] for j in range(w_bytes) ] mem.write(m.array(data), idx_reg, wmask[i * w_bytes:(i + 1) * w_bytes], m.enable(wen)) # m.display("[%0t]: wdata = %x, %x, %x, %x", m.time(), # *mem.WDATA.value()).when(m.posedge(self.io.CLK)).if_(wen) # m.display("[%0t]: wmask = %x, %x, %x, %x", m.time(), # *mem.WMASK.value()).when(m.posedge(self.io.CLK)).if_(wen) tag_and_idx = m.zext_to(m.concat(idx_reg, tag_reg), nasti_params.x_addr_bits) self.io.nasti.ar.data @= NastiReadAddressChannel( nasti_params, 0, tag_and_idx << m.Bits[len(tag_and_idx)](b_len), m.bitutils.clog2(nasti_params.x_data_bits // 8), data_beats - 1) rmeta_and_idx = m.zext_to(m.concat(idx_reg, rmeta.tag), nasti_params.x_addr_bits) self.io.nasti.aw.data @= NastiWriteAddressChannel( nasti_params, 0, rmeta_and_idx << m.Bits[len(rmeta_and_idx)](b_len), m.bitutils.clog2(nasti_params.x_data_bits // 8), data_beats - 1) self.io.nasti.w.data @= NastiWriteDataChannel( nasti_params, m.array([ read[i * nasti_params.x_data_bits:(i + 1) * nasti_params.x_data_bits] for i in range(data_beats) ])[write_count[:-1]], None, write_wrap_out) is_dirty = v.O[idx_reg] & d.O[idx_reg] # TODO: Have to use temporary so we can invoke `fired()` aw_valid = m.Bit(name="aw_valid") self.io.nasti.aw.valid @= aw_valid ar_valid = m.Bit(name="ar_valid") self.io.nasti.ar.valid @= ar_valid b_ready = m.Bit(name="b_ready") self.io.nasti.b.ready @= b_ready @m.inline_combinational() def logic(): state.I @= state.O aw_valid @= False ar_valid @= False self.io.nasti.w.valid @= False b_ready @= False if state.O == State.IDLE: if self.io.cpu.req.valid: if self.io.cpu.req.data.mask.reduce_or(): state.I @= State.WRITE_CACHE else: state.I @= State.READ_CACHE elif state.O == State.READ_CACHE: if hit: if self.io.cpu.req.valid: if self.io.cpu.req.data.mask.reduce_or(): state.I @= State.WRITE_CACHE else: state.I @= State.READ_CACHE else: state.I @= State.IDLE else: aw_valid @= is_dirty ar_valid @= ~is_dirty if self.io.nasti.aw.fired(): state.I @= State.WRITE_BACK elif self.io.nasti.ar.fired(): state.I @= State.REFILL elif state.O == State.WRITE_CACHE: if hit | is_alloc_reg | self.io.cpu.abort: state.I @= State.IDLE else: aw_valid @= is_dirty ar_valid @= ~is_dirty if self.io.nasti.aw.fired(): state.I @= State.WRITE_BACK elif self.io.nasti.ar.fired(): state.I @= State.REFILL elif state.O == State.WRITE_BACK: self.io.nasti.w.valid @= True if write_wrap_out: state.I @= State.WRITE_ACK elif state.O == State.WRITE_ACK: b_ready @= True if self.io.nasti.b.fired(): state.I @= State.REFILL_READY elif state.O == State.REFILL_READY: ar_valid @= True if self.io.nasti.ar.fired(): state.I @= State.REFILL elif state.O == State.REFILL: if read_wrap_out: if cpu_mask.O.reduce_or(): state.I @= State.WRITE_CACHE else: state.I @= State.IDLE if data_beats > 1: # TODO: Have to do this at the end since the inline comb logic # wires up nasti.w write_counter.CE @= m.enable(self.io.nasti.w.fired())