def ICache(clk_i, rst_i, cpu, mem, invalidate, ENABLE=True, D_WIDTH=32, BLOCK_WIDTH=5, SET_WIDTH=9, WAYS=2, LIMIT_WIDTH=32): """ The Instruction Cache module. :param clk: System clock :param rst: System reset :param cpu: CPU slave interface (Wishbone Interconnect to master port) :param mem: Memory master interface (Wishbone Interconnect to slave port) :param invalidate: Enable flush cache :param D_WIDTH: Data width :param BLOCK_WIDTH: Address width for byte access inside a block line :param SET_WIDTH: Address width for line access inside a block :param WAYS: Number of ways for associative cache (Minimum: 2) :param LIMIT_WIDTH: Maximum width for address """ if ENABLE: assert D_WIDTH == 32, "Error: Unsupported D_WIDTH. Supported values: {32}" assert BLOCK_WIDTH > 0, "Error: BLOCK_WIDTH must be a value > 0" assert SET_WIDTH > 0, "Error: SET_WIDTH must be a value > 0" assert not (WAYS & (WAYS - 1)), "Error: WAYS must be a power of 2" # -------------------------------------------------------------------------- WAY_WIDTH = BLOCK_WIDTH + SET_WIDTH # cache mem_wbm address width TAG_WIDTH = LIMIT_WIDTH - WAY_WIDTH # tag size TAGMEM_WAY_WIDTH = TAG_WIDTH + 1 # Add the valid bit TAGMEM_WAY_VALID = TAGMEM_WAY_WIDTH - 1 # Valid bit index TAG_LRU_WIDTH = (WAYS * (WAYS - 1)) >> 1 # (N*(N-1))/2 # -------------------------------------------------------------------------- ic_states = enum('IDLE', 'READ', 'FETCH', 'FLUSH', 'FLUSH_LAST') cpu_wbs = WishboneSlave(cpu) mem_wbm = WishboneMaster(mem) cpu_busy = Signal(False) cpu_err = Signal(False) cpu_wait = Signal(False) mem_read = Signal(False) mem_write = Signal(False) mem_rmw = Signal(False) tag_rw_port = [ RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAGMEM_WAY_WIDTH) for i in range(WAYS) ] tag_flush_port = [ RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAGMEM_WAY_WIDTH) for i in range(WAYS) ] tag_lru_rw_port = RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAG_LRU_WIDTH) tag_lru_flush_port = RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAG_LRU_WIDTH) cache_read_port = [ RAMIOPort(A_WIDTH=WAY_WIDTH - 2, D_WIDTH=D_WIDTH) for _ in range(0, WAYS) ] cache_update_port = [ RAMIOPort(A_WIDTH=WAY_WIDTH - 2, D_WIDTH=D_WIDTH) for _ in range(0, WAYS) ] data_cache = [cache_read_port[i].data_o for i in range(0, WAYS)] state = Signal(ic_states.IDLE) n_state = Signal(ic_states.IDLE) busy = Signal(False) miss = Signal(False) miss_w = Signal(modbv(0)[WAYS:]) miss_w_and = Signal(False) final_fetch = Signal(False) final_flush = Signal(False) lru_select = Signal(modbv(0)[WAYS:]) current_lru = Signal(modbv(0)[TAG_LRU_WIDTH:]) update_lru = Signal(modbv(0)[TAG_LRU_WIDTH:]) access_lru = Signal(modbv(0)[WAYS:]) lru_pre = Signal(modbv(0)[WAYS:]) tag_in = [Signal(modbv(0)[TAGMEM_WAY_WIDTH:]) for _ in range(0, WAYS)] tag_out = [Signal(modbv(0)[TAGMEM_WAY_WIDTH:]) for _ in range(0, WAYS)] lru_in = Signal(modbv(0)[TAG_LRU_WIDTH:]) lru_out = Signal(modbv(0)[TAG_LRU_WIDTH:]) tag_we = Signal(False) refill_addr = Signal(modbv(0)[LIMIT_WIDTH - 2:]) refill_valid = Signal(False) n_refill_addr = Signal(modbv(0)[LIMIT_WIDTH - 2:]) n_refill_valid = Signal(False) flush_addr = Signal(modbv(0)[SET_WIDTH:]) flush_we = Signal(False) n_flush_addr = Signal(modbv(0)[SET_WIDTH:]) n_flush_we = Signal(False) @always_comb def assignments(): final_fetch.next = (refill_addr[BLOCK_WIDTH - 2:] == modbv( -1)[BLOCK_WIDTH - 2:]) and mem_wbm.ack_i and mem_wbm.stb_o and mem_wbm.cyc_o lru_select.next = lru_pre current_lru.next = lru_out access_lru.next = ~miss_w busy.next = state != ic_states.IDLE final_flush.next = flush_addr == 0 @always_comb def miss_check(): """ For each way, check tag and valid flag, and reduce the vector using AND. If the vector is full of ones, the data is not in the cache: assert the miss flag. MISS: data not in cache and the memory operation is a valid read. Ignore this if the module is flushing data. """ value = modbv(0)[WAYS:] for i in range(0, WAYS): value[i] = (not tag_out[i][TAGMEM_WAY_VALID] or tag_out[i][TAG_WIDTH:0] != cpu_wbs.addr_i[LIMIT_WIDTH:WAY_WIDTH]) miss_w.next = value @always_comb def miss_check_2(): """ Vector reduce: check for full miss. """ value = True for i in range(0, WAYS): value = value and miss_w[i] miss_w_and.next = value @always_comb def miss_check_3(): """ Check for valid wishbone cycle, and full miss. """ valid_read = cpu_wbs.cyc_i and cpu_wbs.stb_i and not cpu_wbs.we_i miss.next = miss_w_and and valid_read and not invalidate trwp_clk = [tag_rw_port[i].clk for i in range(WAYS)] trwp_addr = [tag_rw_port[i].addr for i in range(WAYS)] trwp_data_i = [tag_rw_port[i].data_i for i in range(WAYS)] trwp_data_o = [tag_rw_port[i].data_o for i in range(WAYS)] trwp_we = [tag_rw_port[i].we for i in range(WAYS)] @always_comb def tag_rport(): for i in range(WAYS): trwp_clk[i].next = clk_i trwp_addr[i].next = cpu_wbs.addr_i[WAY_WIDTH:BLOCK_WIDTH] trwp_data_i[i].next = tag_in[i] trwp_we[i].next = tag_we tag_out[i].next = trwp_data_o[i] # LRU memory tag_lru_rw_port.clk.next = clk_i tag_lru_rw_port.data_i.next = lru_in lru_out.next = tag_lru_rw_port.data_o tag_lru_rw_port.addr.next = cpu_wbs.addr_i[WAY_WIDTH:BLOCK_WIDTH] tag_lru_rw_port.we.next = tag_we @always_comb def next_state_logic(): n_state.next = state if state == ic_states.IDLE: if invalidate: # cache flush n_state.next = ic_states.FLUSH elif cpu_wbs.cyc_i and not cpu_wbs.we_i: # miss: refill line n_state.next = ic_states.READ elif state == ic_states.READ: if not miss: # miss: refill line n_state.next = ic_states.IDLE else: n_state.next = ic_states.FETCH elif state == ic_states.FETCH: # fetch a line from memory if final_fetch: n_state.next = ic_states.IDLE elif state == ic_states.FLUSH: # invalidate tag memory if final_flush: n_state.next = ic_states.FLUSH_LAST else: n_state.next = ic_states.FLUSH elif state == ic_states.FLUSH_LAST: # last cycle for flush n_state.next = ic_states.IDLE @always(clk_i.posedge) def update_state(): if rst_i: state.next = ic_states.FLUSH else: state.next = n_state @always_comb def fetch_fsm(): n_refill_addr.next = refill_addr n_refill_valid.next = False # refill_valid if state == ic_states.IDLE: if invalidate: n_refill_valid.next = False elif state == ic_states.READ: if miss: n_refill_addr.next = concat( cpu_wbs.addr_i[LIMIT_WIDTH:BLOCK_WIDTH], modbv(0)[BLOCK_WIDTH - 2:]) n_refill_valid.next = True # not mem_wbm.ready? elif state == ic_states.FETCH: n_refill_valid.next = True if refill_valid and mem_wbm.ack_i: if final_fetch: n_refill_valid.next = False n_refill_addr.next = 0 else: n_refill_valid.next = True n_refill_addr.next = refill_addr + modbv( 1)[BLOCK_WIDTH - 2:] @always(clk_i.posedge) def update_fetch(): if rst_i: refill_addr.next = 0 refill_valid.next = False else: refill_addr.next = n_refill_addr refill_valid.next = n_refill_valid @always_comb def tag_write(): for i in range(0, WAYS): tag_in[i].next = tag_out[i] tag_we.next = False lru_in.next = lru_out if state == ic_states.IDLE: if invalidate: tag_we.next = False elif state == ic_states.READ: if miss: for i in range(0, WAYS): if lru_select[i]: tag_in[i].next = concat( True, cpu_wbs.addr_i[LIMIT_WIDTH:WAY_WIDTH]) tag_we.next = True else: lru_in.next = update_lru tag_we.next = True @always_comb def flush_next_state(): n_flush_we.next = False n_flush_addr.next = flush_addr if state == ic_states.IDLE: if invalidate: n_flush_addr.next = modbv(-1)[SET_WIDTH:] n_flush_we.next = True elif state == ic_states.FLUSH: n_flush_addr.next = flush_addr - modbv(1)[SET_WIDTH:] n_flush_we.next = True elif state == ic_states.FLUSH_LAST: n_flush_we.next = False @always(clk_i.posedge) def update_flush(): if rst_i: flush_addr.next = modbv(-1)[SET_WIDTH:] flush_we.next = False else: flush_addr.next = n_flush_addr flush_we.next = n_flush_we tfp_clk = [tag_flush_port[i].clk for i in range(WAYS)] tfp_addr = [tag_flush_port[i].addr for i in range(WAYS)] tfp_data_i = [tag_flush_port[i].data_i for i in range(WAYS)] tfp_we = [tag_flush_port[i].we for i in range(WAYS)] @always_comb def tag_flush_port_assign(): for i in range(WAYS): tfp_clk[i].next = clk_i tfp_addr[i].next = flush_addr tfp_data_i[i].next = modbv(0)[TAGMEM_WAY_WIDTH:] tfp_we[i].next = flush_we # connect to the LRU memory tag_lru_flush_port.clk.next = clk_i tag_lru_flush_port.addr.next = flush_addr tag_lru_flush_port.data_i.next = modbv(0)[TAG_LRU_WIDTH:] tag_lru_flush_port.we.next = flush_we @always_comb def cpu_data_assign(): # cpu data_in assignment: instruction. temp = data_cache[0] for i in range(0, WAYS): if not miss_w[i]: temp = data_cache[i] cpu_wbs.dat_o.next = temp @always_comb def mem_port_assign(): mem_wbm.addr_o.next = concat(refill_addr, modbv(0)[2:]) mem_wbm.dat_o.next = cpu_wbs.dat_i mem_wbm.sel_o.next = modbv(0)[4:] # To Verilog crp_clk = [cache_read_port[i].clk for i in range(0, WAYS)] crp_addr = [cache_read_port[i].addr for i in range(0, WAYS)] crp_data_i = [cache_read_port[i].data_i for i in range(0, WAYS)] crp_we = [cache_read_port[i].we for i in range(0, WAYS)] @always_comb def cache_mem_r(): for i in range(0, WAYS): crp_clk[i].next = clk_i crp_addr[i].next = cpu_wbs.addr_i[WAY_WIDTH:2] crp_data_i[i].next = 0xAABBCCDD crp_we[i].next = False # To Verilog cup_clk = [cache_update_port[i].clk for i in range(0, WAYS)] cup_addr = [cache_update_port[i].addr for i in range(0, WAYS)] cup_data_i = [cache_update_port[i].data_i for i in range(0, WAYS)] cup_we = [cache_update_port[i].we for i in range(0, WAYS)] @always_comb def cache_mem_update(): for i in range(0, WAYS): # ignore data_o from update port cup_clk[i].next = clk_i cup_addr[i].next = refill_addr[WAY_WIDTH - 2:] cup_data_i[i].next = mem_wbm.dat_i cup_we[i].next = lru_select[i] & mem_wbm.ack_i @always_comb def wbs_cpu_flags(): cpu_err.next = mem_wbm.err_i cpu_wait.next = miss_w_and or state != ic_states.READ cpu_busy.next = busy @always_comb def wbm_mem_flags(): mem_read.next = refill_valid and not final_fetch mem_write.next = False mem_rmw.next = False # Remove warnings: Signal is driven but not read for i in range(WAYS): cache_update_port[i].data_o = None tag_flush_port[i].data_o = None tag_lru_flush_port.data_o = None # Generate the wishbone interfaces wbs_cpu = WishboneSlaveGenerator(clk_i, rst_i, cpu_wbs, cpu_busy, cpu_err, cpu_wait).gen_wbs() # noqa wbm_mem = WishboneMasterGenerator(clk_i, rst_i, mem_wbm, mem_read, mem_write, mem_rmw).gen_wbm() # noqa # Instantiate tag memories tag_mem = [ RAM_DP(tag_rw_port[i], tag_flush_port[i], A_WIDTH=SET_WIDTH, D_WIDTH=TAGMEM_WAY_WIDTH) for i in range(WAYS) ] # noqa tag_lru = RAM_DP(tag_lru_rw_port, tag_lru_flush_port, A_WIDTH=SET_WIDTH, D_WIDTH=TAG_LRU_WIDTH) # noqa # instantiate main memory (cache) cache_mem = [ RAM_DP(cache_read_port[i], cache_update_port[i], A_WIDTH=WAY_WIDTH - 2, D_WIDTH=D_WIDTH) for i in range(0, WAYS) ] # noqa # LRU unit. lru_m = CacheLRU(current_lru, access_lru, update_lru, lru_pre, None, NUMWAYS=WAYS) # noqa return instances() else: @always_comb def rtl(): mem.addr.next = cpu.addr mem.dat_o.next = cpu.dat_o mem.sel.next = cpu.sel mem.we.next = cpu.we cpu.dat_i.next = mem.dat_i cpu.ack.next = mem.ack cpu.err.next = mem.err @always(clk_i.posedge) def classic_cycle(): mem.cyc.next = cpu.cyc if not mem.ack else False mem.stb.next = cpu.stb if not mem.ack else False return instances()
def ICache(clk_i, rst_i, cpu, mem, invalidate, ENABLE=True, D_WIDTH=32, BLOCK_WIDTH=5, SET_WIDTH=9, WAYS=2, LIMIT_WIDTH=32): """ The Instruction Cache module. :param clk: System clock :param rst: System reset :param cpu: CPU slave interface (Wishbone Interconnect to master port) :param mem: Memory master interface (Wishbone Interconnect to slave port) :param invalidate: Enable flush cache :param D_WIDTH: Data width :param BLOCK_WIDTH: Address width for byte access inside a block line :param SET_WIDTH: Address width for line access inside a block :param WAYS: Number of ways for associative cache (Minimum: 2) :param LIMIT_WIDTH: Maximum width for address """ if ENABLE: assert D_WIDTH == 32, "Error: Unsupported D_WIDTH. Supported values: {32}" assert BLOCK_WIDTH > 0, "Error: BLOCK_WIDTH must be a value > 0" assert SET_WIDTH > 0, "Error: SET_WIDTH must be a value > 0" assert not (WAYS & (WAYS - 1)), "Error: WAYS must be a power of 2" # -------------------------------------------------------------------------- WAY_WIDTH = BLOCK_WIDTH + SET_WIDTH # cache mem_wbm address width TAG_WIDTH = LIMIT_WIDTH - WAY_WIDTH # tag size TAGMEM_WAY_WIDTH = TAG_WIDTH + 1 # Add the valid bit TAGMEM_WAY_VALID = TAGMEM_WAY_WIDTH - 1 # Valid bit index TAG_LRU_WIDTH = (WAYS * (WAYS - 1)) >> 1 # (N*(N-1))/2 # -------------------------------------------------------------------------- ic_states = enum('IDLE', 'READ', 'FETCH', 'FLUSH', 'FLUSH_LAST') cpu_wbs = WishboneSlave(cpu) mem_wbm = WishboneMaster(mem) cpu_busy = Signal(False) cpu_err = Signal(False) cpu_wait = Signal(False) mem_read = Signal(False) mem_write = Signal(False) mem_rmw = Signal(False) tag_rw_port = [RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAGMEM_WAY_WIDTH) for i in range(WAYS)] tag_flush_port = [RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAGMEM_WAY_WIDTH) for i in range(WAYS)] tag_lru_rw_port = RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAG_LRU_WIDTH) tag_lru_flush_port = RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAG_LRU_WIDTH) cache_read_port = [RAMIOPort(A_WIDTH=WAY_WIDTH - 2, D_WIDTH=D_WIDTH) for _ in range(0, WAYS)] cache_update_port = [RAMIOPort(A_WIDTH=WAY_WIDTH - 2, D_WIDTH=D_WIDTH) for _ in range(0, WAYS)] data_cache = [cache_read_port[i].data_o for i in range(0, WAYS)] state = Signal(ic_states.IDLE) n_state = Signal(ic_states.IDLE) busy = Signal(False) miss = Signal(False) miss_w = Signal(modbv(0)[WAYS:]) miss_w_and = Signal(False) final_fetch = Signal(False) final_flush = Signal(False) lru_select = Signal(modbv(0)[WAYS:]) current_lru = Signal(modbv(0)[TAG_LRU_WIDTH:]) update_lru = Signal(modbv(0)[TAG_LRU_WIDTH:]) access_lru = Signal(modbv(0)[WAYS:]) lru_pre = Signal(modbv(0)[WAYS:]) tag_in = [Signal(modbv(0)[TAGMEM_WAY_WIDTH:]) for _ in range(0, WAYS)] tag_out = [Signal(modbv(0)[TAGMEM_WAY_WIDTH:]) for _ in range(0, WAYS)] lru_in = Signal(modbv(0)[TAG_LRU_WIDTH:]) lru_out = Signal(modbv(0)[TAG_LRU_WIDTH:]) tag_we = Signal(False) refill_addr = Signal(modbv(0)[LIMIT_WIDTH - 2:]) refill_valid = Signal(False) n_refill_addr = Signal(modbv(0)[LIMIT_WIDTH - 2:]) n_refill_valid = Signal(False) flush_addr = Signal(modbv(0)[SET_WIDTH:]) flush_we = Signal(False) n_flush_addr = Signal(modbv(0)[SET_WIDTH:]) n_flush_we = Signal(False) @always_comb def assignments(): final_fetch.next = (refill_addr[BLOCK_WIDTH - 2:] == modbv(-1)[BLOCK_WIDTH - 2:]) and mem_wbm.ack_i and mem_wbm.stb_o and mem_wbm.cyc_o lru_select.next = lru_pre current_lru.next = lru_out access_lru.next = ~miss_w busy.next = state != ic_states.IDLE final_flush.next = flush_addr == 0 @always_comb def miss_check(): """ For each way, check tag and valid flag, and reduce the vector using AND. If the vector is full of ones, the data is not in the cache: assert the miss flag. MISS: data not in cache and the memory operation is a valid read. Ignore this if the module is flushing data. """ value = modbv(0)[WAYS:] for i in range(0, WAYS): value[i] = (not tag_out[i][TAGMEM_WAY_VALID] or tag_out[i][TAG_WIDTH:0] != cpu_wbs.addr_i[LIMIT_WIDTH:WAY_WIDTH]) miss_w.next = value @always_comb def miss_check_2(): """ Vector reduce: check for full miss. """ value = True for i in range(0, WAYS): value = value and miss_w[i] miss_w_and.next = value @always_comb def miss_check_3(): """ Check for valid wishbone cycle, and full miss. """ valid_read = cpu_wbs.cyc_i and cpu_wbs.stb_i and not cpu_wbs.we_i miss.next = miss_w_and and valid_read and not invalidate trwp_clk = [tag_rw_port[i].clk for i in range(WAYS)] trwp_addr = [tag_rw_port[i].addr for i in range(WAYS)] trwp_data_i = [tag_rw_port[i].data_i for i in range(WAYS)] trwp_data_o = [tag_rw_port[i].data_o for i in range(WAYS)] trwp_we = [tag_rw_port[i].we for i in range(WAYS)] @always_comb def tag_rport(): for i in range(WAYS): trwp_clk[i].next = clk_i trwp_addr[i].next = cpu_wbs.addr_i[WAY_WIDTH:BLOCK_WIDTH] trwp_data_i[i].next = tag_in[i] trwp_we[i].next = tag_we tag_out[i].next = trwp_data_o[i] # LRU memory tag_lru_rw_port.clk.next = clk_i tag_lru_rw_port.data_i.next = lru_in lru_out.next = tag_lru_rw_port.data_o tag_lru_rw_port.addr.next = cpu_wbs.addr_i[WAY_WIDTH:BLOCK_WIDTH] tag_lru_rw_port.we.next = tag_we @always_comb def next_state_logic(): n_state.next = state if state == ic_states.IDLE: if invalidate: # cache flush n_state.next = ic_states.FLUSH elif cpu_wbs.cyc_i and not cpu_wbs.we_i: # miss: refill line n_state.next = ic_states.READ elif state == ic_states.READ: if not miss: # miss: refill line n_state.next = ic_states.IDLE else: n_state.next = ic_states.FETCH elif state == ic_states.FETCH: # fetch a line from memory if final_fetch: n_state.next = ic_states.IDLE elif state == ic_states.FLUSH: # invalidate tag memory if final_flush: n_state.next = ic_states.FLUSH_LAST else: n_state.next = ic_states.FLUSH elif state == ic_states.FLUSH_LAST: # last cycle for flush n_state.next = ic_states.IDLE @always(clk_i.posedge) def update_state(): if rst_i: state.next = ic_states.FLUSH else: state.next = n_state @always_comb def fetch_fsm(): n_refill_addr.next = refill_addr n_refill_valid.next = False # refill_valid if state == ic_states.IDLE: if invalidate: n_refill_valid.next = False elif state == ic_states.READ: if miss: n_refill_addr.next = concat(cpu_wbs.addr_i[LIMIT_WIDTH:BLOCK_WIDTH], modbv(0)[BLOCK_WIDTH - 2:]) n_refill_valid.next = True # not mem_wbm.ready? elif state == ic_states.FETCH: n_refill_valid.next = True if refill_valid and mem_wbm.ack_i: if final_fetch: n_refill_valid.next = False n_refill_addr.next = 0 else: n_refill_valid.next = True n_refill_addr.next = refill_addr + modbv(1)[BLOCK_WIDTH - 2:] @always(clk_i.posedge) def update_fetch(): if rst_i: refill_addr.next = 0 refill_valid.next = False else: refill_addr.next = n_refill_addr refill_valid.next = n_refill_valid @always_comb def tag_write(): for i in range(0, WAYS): tag_in[i].next = tag_out[i] tag_we.next = False lru_in.next = lru_out if state == ic_states.IDLE: if invalidate: tag_we.next = False elif state == ic_states.READ: if miss: for i in range(0, WAYS): if lru_select[i]: tag_in[i].next = concat(True, cpu_wbs.addr_i[LIMIT_WIDTH:WAY_WIDTH]) tag_we.next = True else: lru_in.next = update_lru tag_we.next = True @always_comb def flush_next_state(): n_flush_we.next = False n_flush_addr.next = flush_addr if state == ic_states.IDLE: if invalidate: n_flush_addr.next = modbv(-1)[SET_WIDTH:] n_flush_we.next = True elif state == ic_states.FLUSH: n_flush_addr.next = flush_addr - modbv(1)[SET_WIDTH:] n_flush_we.next = True elif state == ic_states.FLUSH_LAST: n_flush_we.next = False @always(clk_i.posedge) def update_flush(): if rst_i: flush_addr.next = modbv(-1)[SET_WIDTH:] flush_we.next = False else: flush_addr.next = n_flush_addr flush_we.next = n_flush_we tfp_clk = [tag_flush_port[i].clk for i in range(WAYS)] tfp_addr = [tag_flush_port[i].addr for i in range(WAYS)] tfp_data_i = [tag_flush_port[i].data_i for i in range(WAYS)] tfp_we = [tag_flush_port[i].we for i in range(WAYS)] @always_comb def tag_flush_port_assign(): for i in range(WAYS): tfp_clk[i].next = clk_i tfp_addr[i].next = flush_addr tfp_data_i[i].next = modbv(0)[TAGMEM_WAY_WIDTH:] tfp_we[i].next = flush_we # connect to the LRU memory tag_lru_flush_port.clk.next = clk_i tag_lru_flush_port.addr.next = flush_addr tag_lru_flush_port.data_i.next = modbv(0)[TAG_LRU_WIDTH:] tag_lru_flush_port.we.next = flush_we @always_comb def cpu_data_assign(): # cpu data_in assignment: instruction. temp = data_cache[0] for i in range(0, WAYS): if not miss_w[i]: temp = data_cache[i] cpu_wbs.dat_o.next = temp @always_comb def mem_port_assign(): mem_wbm.addr_o.next = concat(refill_addr, modbv(0)[2:]) mem_wbm.dat_o.next = cpu_wbs.dat_i mem_wbm.sel_o.next = modbv(0)[4:] # To Verilog crp_clk = [cache_read_port[i].clk for i in range(0, WAYS)] crp_addr = [cache_read_port[i].addr for i in range(0, WAYS)] crp_data_i = [cache_read_port[i].data_i for i in range(0, WAYS)] crp_we = [cache_read_port[i].we for i in range(0, WAYS)] @always_comb def cache_mem_r(): for i in range(0, WAYS): crp_clk[i].next = clk_i crp_addr[i].next = cpu_wbs.addr_i[WAY_WIDTH:2] crp_data_i[i].next = 0xAABBCCDD crp_we[i].next = False # To Verilog cup_clk = [cache_update_port[i].clk for i in range(0, WAYS)] cup_addr = [cache_update_port[i].addr for i in range(0, WAYS)] cup_data_i = [cache_update_port[i].data_i for i in range(0, WAYS)] cup_we = [cache_update_port[i].we for i in range(0, WAYS)] @always_comb def cache_mem_update(): for i in range(0, WAYS): # ignore data_o from update port cup_clk[i].next = clk_i cup_addr[i].next = refill_addr[WAY_WIDTH - 2:] cup_data_i[i].next = mem_wbm.dat_i cup_we[i].next = lru_select[i] & mem_wbm.ack_i @always_comb def wbs_cpu_flags(): cpu_err.next = mem_wbm.err_i cpu_wait.next = miss_w_and or state != ic_states.READ cpu_busy.next = busy @always_comb def wbm_mem_flags(): mem_read.next = refill_valid and not final_fetch mem_write.next = False mem_rmw.next = False # Remove warnings: Signal is driven but not read for i in range(WAYS): cache_update_port[i].data_o = None tag_flush_port[i].data_o = None tag_lru_flush_port.data_o = None # Generate the wishbone interfaces wbs_cpu = WishboneSlaveGenerator(clk_i, rst_i, cpu_wbs, cpu_busy, cpu_err, cpu_wait).gen_wbs() # noqa wbm_mem = WishboneMasterGenerator(clk_i, rst_i, mem_wbm, mem_read, mem_write, mem_rmw).gen_wbm() # noqa # Instantiate tag memories tag_mem = [RAM_DP(tag_rw_port[i], tag_flush_port[i], A_WIDTH=SET_WIDTH, D_WIDTH=TAGMEM_WAY_WIDTH) for i in range(WAYS)] # noqa tag_lru = RAM_DP(tag_lru_rw_port, tag_lru_flush_port, A_WIDTH=SET_WIDTH, D_WIDTH=TAG_LRU_WIDTH) # noqa # instantiate main memory (cache) cache_mem = [RAM_DP(cache_read_port[i], cache_update_port[i], A_WIDTH=WAY_WIDTH - 2, D_WIDTH=D_WIDTH) for i in range(0, WAYS)] # noqa # LRU unit. lru_m = CacheLRU(current_lru, access_lru, update_lru, lru_pre, None, NUMWAYS=WAYS) # noqa return instances() else: @always_comb def rtl(): mem.addr.next = cpu.addr mem.dat_o.next = cpu.dat_o mem.sel.next = cpu.sel mem.we.next = cpu.we cpu.dat_i.next = mem.dat_i cpu.ack.next = mem.ack cpu.err.next = mem.err @always(clk_i.posedge) def classic_cycle(): mem.cyc.next = cpu.cyc if not mem.ack else False mem.stb.next = cpu.stb if not mem.ack else False return instances()
def DCache(clk_i, rst_i, cpu, mem, invalidate, ENABLE=True, D_WIDTH=32, BLOCK_WIDTH=5, SET_WIDTH=9, WAYS=2, LIMIT_WIDTH=32): """ The Instruction Cache module. :param clk: System clock :param rst: System reset :param cpu: CPU slave interface (Wishbone Interconnect to master port) :param mem: Memory master interface (Wishbone Interconnect to slave port) :param invalidate: Invalidate the cache :param D_WIDTH: Data width :param BLOCK_WIDTH: Address width for byte access inside a block line :param SET_WIDTH: Address width for line access inside a block :param WAYS: Number of ways for associative cache (Minimum: 2) :param LIMIT_WIDTH: Maximum width for address """ if ENABLE: assert D_WIDTH == 32, "Error: Unsupported D_WIDTH. Supported values: {32}" assert BLOCK_WIDTH > 0, "Error: BLOCK_WIDTH must be a value > 0" assert SET_WIDTH > 0, "Error: SET_WIDTH must be a value > 0" assert not (WAYS & (WAYS - 1)), "Error: WAYS must be a power of 2" # -------------------------------------------------------------------------- WAY_WIDTH = BLOCK_WIDTH + SET_WIDTH # cache mem address width TAG_WIDTH = LIMIT_WIDTH - WAY_WIDTH # tag size TAGMEM_WAY_WIDTH = TAG_WIDTH + 2 # Add the valid and dirty bit TAGMEM_WAY_VALID = TAGMEM_WAY_WIDTH - 2 # Valid bit index TAGMEM_WAY_DIRTY = TAGMEM_WAY_WIDTH - 1 # Dirty bit index TAG_LRU_WIDTH = (WAYS * (WAYS - 1)) >> 1 # (N*(N-1))/2 # -------------------------------------------------------------------------- dc_states = enum('IDLE', 'SINGLE', 'READ', 'WRITE', 'FETCH', 'EVICTING', 'FLUSH1', 'FLUSH2', 'FLUSH3') tag_rw_port = [RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAGMEM_WAY_WIDTH) for i in range(WAYS)] tag_flush_port = [RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAGMEM_WAY_WIDTH) for i in range(WAYS)] tag_lru_rw_port = RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAG_LRU_WIDTH) tag_lru_flush_port = RAMIOPort(A_WIDTH=SET_WIDTH, D_WIDTH=TAG_LRU_WIDTH) cache_read_port = [RAMIOPort(A_WIDTH=WAY_WIDTH - 2, D_WIDTH=D_WIDTH) for _ in range(0, WAYS)] cache_update_port = [RAMIOPort(A_WIDTH=WAY_WIDTH - 2, D_WIDTH=D_WIDTH) for _ in range(0, WAYS)] data_cache = [cache_read_port[i].data_o for i in range(0, WAYS)] data_cache2 = [cache_update_port[i].data_o for i in range(0, WAYS)] tag_entry = Signal(modbv(0)[TAG_WIDTH:]) tag_in = [Signal(modbv(0)[TAGMEM_WAY_WIDTH:]) for _ in range(0, WAYS)] tag_out = [Signal(modbv(0)[TAGMEM_WAY_WIDTH:]) for _ in range(0, WAYS)] lru_in = Signal(modbv(0)[TAG_LRU_WIDTH:]) lru_out = Signal(modbv(0)[TAG_LRU_WIDTH:]) tag_we = Signal(False) lru_select = Signal(modbv(0)[WAYS:]) current_lru = Signal(modbv(0)[TAG_LRU_WIDTH:]) update_lru = Signal(modbv(0)[TAG_LRU_WIDTH:]) access_lru = Signal(modbv(0)[WAYS:]) lru_pre = Signal(modbv(0)[WAYS:]) flush_addr = Signal(modbv(0)[SET_WIDTH:]) flush_we = Signal(False) n_flush_addr = Signal(modbv(0)[SET_WIDTH:]) n_flush_we = Signal(False) dc_update_addr = Signal(modbv(0)[LIMIT_WIDTH - 2:]) evict_data = Signal(modbv(0)[32:]) state = Signal(dc_states.IDLE) n_state = Signal(dc_states.IDLE) miss = Signal(False) miss_w = Signal(modbv(0)[WAYS:]) miss_w_and = Signal(False) valid = Signal(False) dirty = Signal(False) done = Signal(False) final_flush = Signal(False) final_access = Signal(False) fetch = Signal(False) evict = Signal(False) use_cache = Signal(False) cpu_wbs = WishboneSlave(cpu) mem_wbm = WishboneMaster(mem) cpu_busy = Signal(False) cpu_err = Signal(False) cpu_wait = Signal(False) mem_read = Signal(False) mem_write = Signal(False) mem_rmw = Signal(False) @always_comb def next_state_logic(): n_state.next = state if state == dc_states.IDLE: if invalidate: # flush request n_state.next = dc_states.FLUSH1 elif cpu_wbs.cyc_i and not cpu_wbs.we_i and not use_cache: # read (uncached) n_state.next = dc_states.SINGLE elif cpu_wbs.cyc_i and not cpu_wbs.we_i: # read (cached) n_state.next = dc_states.READ elif cpu_wbs.cyc_i and cpu_wbs.we_i and not use_cache: # write (uncached) n_state.next = dc_states.SINGLE elif cpu_wbs.cyc_i and cpu_wbs.we_i: # write (cached) n_state.next = dc_states.WRITE elif state == dc_states.SINGLE: if done: n_state.next = dc_states.IDLE elif state == dc_states.READ: if not miss: # cache hit n_state.next = dc_states.IDLE elif valid and dirty: # cache is valid but dirty n_state.next = dc_states.EVICTING else: # cache miss n_state.next = dc_states.FETCH elif state == dc_states.WRITE: if not miss: # Hit n_state.next = dc_states.IDLE elif valid and dirty: # Cache miss. Line is valid but dirty: write back n_state.next = dc_states.EVICTING else: n_state.next = dc_states.FETCH elif state == dc_states.EVICTING: if done: n_state.next = dc_states.FETCH elif state == dc_states.FETCH: if done: n_state.next = dc_states.IDLE elif state == dc_states.FLUSH1: n_state.next = dc_states.FLUSH2 elif state == dc_states.FLUSH2: if dirty: n_state.next = dc_states.FLUSH3 if final_flush: n_state.next = dc_states.IDLE else: n_state.next = dc_states.FLUSH1 elif state == dc_states.FLUSH3: if done: if final_flush: n_state.next = dc_states.IDLE else: n_state.next = dc_states.FLUSH1 @always(clk_i.posedge) def update_state(): if rst_i: state.next = dc_states.IDLE else: state.next = n_state @always_comb def assignments(): final_access.next = (dc_update_addr[BLOCK_WIDTH - 2:] == modbv(-1)[BLOCK_WIDTH - 2:]) and mem_wbm.ack_i and mem_wbm.cyc_o and mem_wbm.stb_o final_flush.next = flush_addr == 0 lru_select.next = lru_pre current_lru.next = lru_out access_lru.next = ~miss_w use_cache.next = not cpu_wbs.addr_i[31] # Address < 0x8000_0000 use the cache @always_comb def tag_entry_assign(): """ Using the lru history, get the tag entry needed in case of evicting. """ for i in range(0, WAYS): if lru_select[i]: tag_entry.next = tag_out[i][TAG_WIDTH:] @always_comb def done_fetch_evict_assign(): """ Flags to indicate current state of the FSM. fetch: getting data from memory. evic: writing data from cache to memory. done: the last access to memory. """ fetch.next = state == dc_states.FETCH and not final_access evict.next = (state == dc_states.EVICTING or state == dc_states.FLUSH3) and not final_access done.next = final_access if use_cache else mem_wbm.ack_i @always_comb def miss_check(): """ For each way, check tag and valid flag, and reduce the vector using AND. If the vector is full of ones, the data is not in the cache: assert the miss flag. MISS: data not in cache and the memory operation is a valid read. Ignore this if the module is flushing data. """ value = modbv(0)[WAYS:] for i in range(0, WAYS): value[i] = (not tag_out[i][TAGMEM_WAY_VALID] or tag_out[i][TAG_WIDTH:0] != cpu_wbs.addr_i[LIMIT_WIDTH:WAY_WIDTH]) miss_w.next = value @always_comb def miss_check_2(): """ Vector reduce: check for full miss. """ value = True for i in range(0, WAYS): value = value and miss_w[i] miss_w_and.next = value @always_comb def miss_check_3(): """ Check for valid wishbone cycle, and full miss. """ valid_access = cpu_wbs.cyc_i and cpu_wbs.stb_i and use_cache miss.next = miss_w_and and valid_access and not invalidate @always_comb def get_valid_n_dirty(): """ In case of miss get the valid and dirty flags, needed to detect if a evicting must be done first. """ for i in range(0, WAYS): if lru_select[i]: valid.next = tag_out[i][TAGMEM_WAY_VALID] dirty.next = tag_out[i][TAGMEM_WAY_DIRTY] trwp_clk = [tag_rw_port[i].clk for i in range(WAYS)] trwp_addr = [tag_rw_port[i].addr for i in range(WAYS)] trwp_data_i = [tag_rw_port[i].data_i for i in range(WAYS)] trwp_data_o = [tag_rw_port[i].data_o for i in range(WAYS)] trwp_we = [tag_rw_port[i].we for i in range(WAYS)] @always_comb def tag_rport(): for i in range(WAYS): trwp_clk[i].next = clk_i trwp_addr[i].next = cpu_wbs.addr_i[WAY_WIDTH:BLOCK_WIDTH] trwp_data_i[i].next = tag_in[i] trwp_we[i].next = tag_we tag_out[i].next = trwp_data_o[i] # LRU memory tag_lru_rw_port.clk.next = clk_i tag_lru_rw_port.data_i.next = lru_in lru_out.next = tag_lru_rw_port.data_o tag_lru_rw_port.addr.next = cpu_wbs.addr_i[WAY_WIDTH:BLOCK_WIDTH] tag_lru_rw_port.we.next = tag_we @always_comb def tag_write(): for i in range(0, WAYS): tag_in[i].next = tag_out[i] tag_we.next = False lru_in.next = lru_out if state == dc_states.READ or state == dc_states.WRITE: if miss: for i in range(0, WAYS): if lru_select[i]: tag_in[i].next = concat(False, True, cpu_wbs.addr_i[LIMIT_WIDTH:WAY_WIDTH]) tag_we.next = True else: if cpu_wbs.ack_o and cpu_wbs.cyc_i: for i in range(0, WAYS): if lru_select[i]: tag_in[i].next = tag_out[i] | (cpu_wbs.we_i << TAGMEM_WAY_DIRTY) # TODO: Optimize lru_in.next = update_lru tag_we.next = True @always_comb def flush_next_state(): n_flush_we.next = False n_flush_addr.next = flush_addr if state == dc_states.IDLE: if invalidate: n_flush_addr.next = modbv(-1)[SET_WIDTH:] elif state == dc_states.FLUSH1: n_flush_we.next = True elif state == dc_states.FLUSH2: n_flush_we.next = False n_flush_addr.next = flush_addr - modbv(1)[SET_WIDTH:] @always(clk_i.posedge) def update_flush(): if rst_i: flush_addr.next = modbv(-1)[SET_WIDTH:] flush_we.next = False else: flush_addr.next = n_flush_addr flush_we.next = n_flush_we and not dirty @always(clk_i.posedge) def update_addr_fsm(): if rst_i: dc_update_addr.next = 0 else: if state == dc_states.READ or state == dc_states.WRITE: if miss and not dirty: dc_update_addr.next = concat(cpu_wbs.addr_i[LIMIT_WIDTH:BLOCK_WIDTH], modbv(0)[BLOCK_WIDTH - 2:]) elif miss and dirty: dc_update_addr.next = concat(tag_entry, cpu_wbs.addr_i[WAY_WIDTH:2]) elif state == dc_states.FLUSH2: if dirty: dc_update_addr.next = concat(tag_entry, modbv(0)[WAY_WIDTH - 2:]) elif state == dc_states.EVICTING or state == dc_states.FETCH or state == dc_states.FLUSH3: if final_access: dc_update_addr.next = concat(cpu_wbs.addr_i[LIMIT_WIDTH:BLOCK_WIDTH], modbv(0)[BLOCK_WIDTH - 2:]) elif mem_wbm.ack_i and mem_wbm.stb_o: dc_update_addr.next = dc_update_addr + modbv(1)[BLOCK_WIDTH - 2:] else: dc_update_addr.next = 0 tfp_clk = [tag_flush_port[i].clk for i in range(WAYS)] tfp_addr = [tag_flush_port[i].addr for i in range(WAYS)] tfp_data_i = [tag_flush_port[i].data_i for i in range(WAYS)] tfp_we = [tag_flush_port[i].we for i in range(WAYS)] @always_comb def tag_flush_port_assign(): for i in range(WAYS): tfp_clk[i].next = clk_i tfp_addr[i].next = flush_addr tfp_data_i[i].next = modbv(0)[TAGMEM_WAY_WIDTH:] tfp_we[i].next = flush_we # connect to the LRU memory tag_lru_flush_port.clk.next = clk_i tag_lru_flush_port.addr.next = flush_addr tag_lru_flush_port.data_i.next = modbv(0)[TAG_LRU_WIDTH:] tag_lru_flush_port.we.next = flush_we @always_comb def cpu_data_assign(): temp = data_cache[0] for i in range(0, WAYS): if not miss_w[i]: temp = data_cache[i] cpu_wbs.dat_o.next = temp if use_cache else mem_wbm.dat_i @always_comb def evict_data_assign(): for i in range(0, WAYS): if lru_select[i]: evict_data.next = data_cache2[i] @always_comb def mem_port_assign(): mem_wbm.addr_o.next = concat(dc_update_addr, modbv(0)[2:]) if use_cache else cpu_wbs.addr_i mem_wbm.dat_o.next = evict_data if use_cache else cpu_wbs.dat_i mem_wbm.sel_o.next = modbv(0b1111)[4:] if use_cache else cpu_wbs.sel_i # To Verilog crp_clk = [cache_read_port[i].clk for i in range(0, WAYS)] crp_addr = [cache_read_port[i].addr for i in range(0, WAYS)] crp_data_i = [cache_read_port[i].data_i for i in range(0, WAYS)] crp_we = [cache_read_port[i].we for i in range(0, WAYS)] @always_comb def cache_mem_rw(): for i in range(0, WAYS): crp_clk[i].next = clk_i crp_addr[i].next = cpu_wbs.addr_i[WAY_WIDTH:2] crp_data_i[i].next = concat(cpu_wbs.dat_i[32:24] if cpu_wbs.sel_i[3] else data_cache[i][32:24], cpu_wbs.dat_i[24:16] if cpu_wbs.sel_i[2] else data_cache[i][24:16], cpu_wbs.dat_i[16:8] if cpu_wbs.sel_i[1] else data_cache[i][16:8], cpu_wbs.dat_i[8:0] if cpu_wbs.sel_i[0] else data_cache[i][8:0]) crp_we[i].next = state == dc_states.WRITE and not miss_w[i] and cpu_wbs.ack_o and cpu_wbs.we_i # TODO: check for not ACK or ACK? # To Verilog cup_clk = [cache_update_port[i].clk for i in range(0, WAYS)] cup_addr = [cache_update_port[i].addr for i in range(0, WAYS)] cup_data_i = [cache_update_port[i].data_i for i in range(0, WAYS)] cup_we = [cache_update_port[i].we for i in range(0, WAYS)] @always_comb def cache_mem_update(): for i in range(0, WAYS): cup_clk[i].next = clk_i cup_addr[i].next = dc_update_addr[WAY_WIDTH - 2:] cup_data_i[i].next = mem_wbm.dat_i cup_we[i].next = lru_select[i] and mem_wbm.ack_i and state == dc_states.FETCH @always_comb def wbs_cpu_flags(): cpu_err.next = mem_wbm.err_i cpu_wait.next = miss_w_and or not (state == dc_states.READ or state == dc_states.WRITE) if use_cache else not mem_wbm.ack_i cpu_busy.next = False @always_comb def wbm_mem_flags(): mem_read.next = fetch if use_cache else not cpu_wbs.we_i and cpu_wbs.cyc_i mem_write.next = evict if use_cache else cpu_wbs.we_i and cpu_wbs.cyc_i mem_rmw.next = False # Remove warnings: Signal is driven but not read for i in range(WAYS): tag_flush_port[i].data_o = None tag_lru_flush_port.data_o = None # Generate the wishbone interfaces wbs_cpu = WishboneSlaveGenerator(clk_i, rst_i, cpu_wbs, cpu_busy, cpu_err, cpu_wait).gen_wbs() # noqa wbm_mem = WishboneMasterGenerator(clk_i, rst_i, mem_wbm, mem_read, mem_write, mem_rmw).gen_wbm() # noqa # Instantiate tag memories tag_mem = [RAM_DP(tag_rw_port[i], tag_flush_port[i], A_WIDTH=SET_WIDTH, D_WIDTH=TAGMEM_WAY_WIDTH) for i in range(WAYS)] # noqa tag_lru = RAM_DP(tag_lru_rw_port, tag_lru_flush_port, A_WIDTH=SET_WIDTH, D_WIDTH=TAG_LRU_WIDTH) # noqa # Instantiate main memory (Cache) cache_mem = [RAM_DP(cache_read_port[i], cache_update_port[i], A_WIDTH=WAY_WIDTH - 2, D_WIDTH=D_WIDTH) for i in range(0, WAYS)] # noqa # LRU unit lru_m = CacheLRU(current_lru, access_lru, update_lru, lru_pre, None, NUMWAYS=WAYS) # noqa return instances() else: @always_comb def rtl(): mem.addr.next = cpu.addr mem.dat_o.next = cpu.dat_o mem.sel.next = cpu.sel mem.we.next = cpu.we cpu.dat_i.next = mem.dat_i cpu.ack.next = mem.ack cpu.err.next = mem.err @always(clk_i.posedge) def classic_cycle(): mem.cyc.next = cpu.cyc if not mem.ack else False mem.stb.next = cpu.stb if not mem.ack else False return instances()