Esempio n. 1
0
  def construct( s, DataType, nregs ):

    # Constant

    AddrType = mk_bits( clog2( nregs ) )

    # Interface

    s.recv_waddr = RecvIfcRTL( AddrType )
    s.recv_wdata = RecvIfcRTL( DataType )
    s.recv_raddr = RecvIfcRTL( AddrType )
    s.send_rdata = SendIfcRTL( DataType )

    # Component

    s.reg_file   = RegisterFile( DataType, nregs )

    # Connections

    s.reg_file.raddr[0] //= s.recv_raddr.msg
    s.reg_file.waddr[0] //= s.recv_waddr.msg
    s.reg_file.wdata[0] //= s.recv_wdata.msg
    s.send_rdata.msg    //= s.reg_file.rdata[0]
    s.reg_file.wen[0]   //= b1( 1 )

    @s.update
    def update_signal():
      s.recv_raddr.rdy = s.send_rdata.rdy
      s.recv_waddr.rdy = s.send_rdata.rdy
      s.recv_wdata.rdy = s.send_rdata.rdy
      s.send_rdata.en  = s.recv_raddr.en
Esempio n. 2
0
    def construct(s, EntryType, num_entries=2):

        # Interface

        s.enq_msg = InPort(EntryType)
        s.deq_msg = OutPort(EntryType)

        s.wen = InPort(Bits1)
        s.waddr = InPort(mk_bits(clog2(num_entries)))
        s.raddr = InPort(mk_bits(clog2(num_entries)))

        # Component

        s.queue = RegisterFile(EntryType, num_entries)(
            raddr={
                0: s.raddr
            },
            rdata={
                0: s.deq_msg
            },
            wen={
                0: s.wen
            },
            waddr={
                0: s.waddr
            },
            wdata={
                0: s.enq_msg
            },
        )
Esempio n. 3
0
    def construct(s, CtrlType, ctrl_mem_size, num_ctrl=4):

        # Constant
        # assert( ctrl_mem_size <= num_ctrl )
        AddrType = mk_bits(clog2(ctrl_mem_size))
        TimeType = mk_bits(clog2(num_ctrl + 1))
        last_item = AddrType(ctrl_mem_size - 1)

        # Interface

        s.send_ctrl = SendIfcRTL(CtrlType)
        s.recv_waddr = RecvIfcRTL(AddrType)
        s.recv_ctrl = RecvIfcRTL(CtrlType)

        # Component

        s.reg_file = RegisterFile(CtrlType, ctrl_mem_size, 1, 1)
        s.times = Wire(TimeType)

        # Connections

        s.send_ctrl.msg //= s.reg_file.rdata[0]
        s.reg_file.waddr[0] //= s.recv_waddr.msg
        s.reg_file.wdata[0] //= s.recv_ctrl.msg
        s.reg_file.wen[0] //= s.recv_ctrl.en and s.recv_waddr.en

        @s.update
        def update_signal():
            if s.times == TimeType(
                    num_ctrl) or s.reg_file.rdata[0].ctrl == OPT_START:
                s.send_ctrl.en = b1(0)
            else:
                s.send_ctrl.en = s.send_ctrl.rdy  # s.recv_raddr[i].rdy
            s.recv_waddr.rdy = b1(1)
            s.recv_ctrl.rdy = b1(1)

        @s.update_ff
        def update_raddr():
            if s.reg_file.rdata[0].ctrl != OPT_START:
                if s.times < TimeType(num_ctrl):
                    s.times <<= s.times + TimeType(1)
                if s.reg_file.raddr[0] < last_item:
                    s.reg_file.raddr[0] <<= s.reg_file.raddr[0] + AddrType(1)
                else:
                    s.reg_file.raddr[0] <<= AddrType(0)
Esempio n. 4
0
    def construct(s, ReqType, RespType, nregs=16):

        # Interface

        s.xcel = XcelMinionIfcRTL(ReqType, RespType)

        # Local parameters

        DataType = ReqType.get_field_type('data')
        assert DataType is RespType.get_field_type('data')

        s.nregs = nregs

        # Components

        s.req_q = NormalQueueRTL(ReqType, num_entries=1)
        s.wen = Wire(Bits1)
        s.reg_file = RegisterFile(DataType, nregs)(
            raddr={
                0: s.req_q.deq.msg.addr
            },
            rdata={
                0: s.xcel.resp.msg.data
            },
            wen={
                0: s.wen
            },
            waddr={
                0: s.req_q.deq.msg.addr
            },
            wdata={
                0: s.req_q.deq.msg.data
            },
        )
        connect(s.xcel.req, s.req_q.enq)
        connect(s.xcel.resp.msg.type_, s.req_q.deq.msg.type_)

        @s.update
        def up_wen():
            s.wen = s.req_q.deq.rdy and s.req_q.deq.msg.type_ == XcelMsgType.WRITE

        @s.update
        def up_resp():
            s.xcel.resp.en = s.req_q.deq.rdy and s.xcel.resp.rdy
            s.req_q.deq.en = s.req_q.deq.rdy and s.xcel.resp.rdy
Esempio n. 5
0
  def construct( s, DataType, data_mem_size, rd_ports=1, wr_ports=1 ):

    # Constant

    AddrType = mk_bits( clog2( data_mem_size ) )

    # Interface

    s.recv_raddr = [ RecvIfcRTL( AddrType ) for _ in range( rd_ports ) ]
    s.send_rdata = [ SendIfcRTL( DataType ) for _ in range( rd_ports ) ]
    s.recv_waddr = [ RecvIfcRTL( AddrType ) for _ in range( wr_ports ) ]
    s.recv_wdata = [ RecvIfcRTL( DataType ) for _ in range( wr_ports ) ]

    # Component

    s.reg_file   = RegisterFile( DataType, data_mem_size, rd_ports, wr_ports )

    # Connections

    for i in range( rd_ports ):
      s.reg_file.raddr[i] //= s.recv_raddr[i].msg
      s.send_rdata[i].msg //= s.reg_file.rdata[i]

    for i in range( wr_ports ):
      s.reg_file.waddr[i] //= s.recv_waddr[i].msg
      s.reg_file.wdata[i] //= s.recv_wdata[i].msg
      s.reg_file.wen[i]   //= s.recv_wdata[i].en and s.recv_waddr[i].en

    @s.update
    def update_signal():
      for i in range( rd_ports ):
        s.recv_raddr[i].rdy = s.send_rdata[i].rdy
                              # b1( 1 ) # s.send_rdata[i].rdy
        s.send_rdata[i].en  = s.recv_raddr[i].en
                              # s.send_rdata[i].rdy # s.recv_raddr[i].en
      for i in range( wr_ports ):
        s.recv_waddr[i].rdy = Bits1( 1 )
        s.recv_wdata[i].rdy = Bits1( 1 )
Esempio n. 6
0
    def construct(s, num_entries, Type):

        AddrType = mk_bits(clog2(num_entries))

        s.enq_bits = InPort(Type)
        s.deq_bits = OutPort(Type)

        # Control signal (ctrl -> dpath)
        s.wen = InPort(Bits1)
        s.waddr = InPort(AddrType)
        s.raddr = InPort(AddrType)

        # Queue storage

        s.queue = RegisterFile(Type, num_entries)

        # Connect queue storage

        connect(s.queue.raddr[0], s.raddr)
        connect(s.queue.rdata[0], s.deq_bits)
        connect(s.queue.wen[0], s.wen)
        connect(s.queue.waddr[0], s.waddr)
        connect(s.queue.wdata[0], s.enq_bits)
Esempio n. 7
0
    def construct(s, EntryType, num_entries=2):

        # Interface

        s.enq_msg = InPort(EntryType)
        s.deq_msg = OutPort(EntryType)

        s.wen = InPort(Bits1)
        s.waddr = InPort(mk_bits(clog2(num_entries)))
        s.raddr = InPort(mk_bits(clog2(num_entries)))
        s.mux_sel = InPort(Bits1)

        # Component

        s.queue = RegisterFile(EntryType, num_entries)(
            raddr={
                0: s.raddr
            },
            wen={
                0: s.wen
            },
            waddr={
                0: s.waddr
            },
            wdata={
                0: s.enq_msg
            },
        )

        s.mux = Mux(EntryType, 2)(
            sel=s.mux_sel,
            in_={
                0: s.queue.rdata[0],
                1: s.enq_msg
            },
            out=s.deq_msg,
        )
Esempio n. 8
0
    def construct(s, num_cores=1):

        dtype = mk_bits(32)
        MemReqType, MemRespType = mk_mem_msg(8, 32, 32)

        #---------------------------------------------------------------------
        # Interface
        #---------------------------------------------------------------------

        # Parameters

        s.core_id = InPort(dtype)

        # imem ports

        s.imemreq_msg = OutPort(MemReqType)
        s.imemresp_msg = InPort(MemRespType)

        # dmem ports

        s.dmemreq_data = OutPort(dtype)
        s.dmemreq_addr = OutPort(dtype)
        s.dmemresp_msg = InPort(MemRespType)

        # mngr ports

        s.mngr2proc_data = InPort(dtype)
        s.proc2mngr_data = OutPort(dtype)

        # xcel ports

        s.xcelreq_addr = OutPort(Bits5)
        s.xcelreq_data = OutPort(Bits32)
        s.xcelresp_msg = InPort(XcelRespMsg)

        # Control signals (ctrl->dpath)

        s.reg_en_F = InPort()
        s.pc_sel_F = InPort(Bits2)

        s.reg_en_D = InPort()
        s.op1_byp_sel_D = InPort(Bits2)
        s.op2_byp_sel_D = InPort(Bits2)
        s.op1_sel_D = InPort()
        s.op2_sel_D = InPort(Bits2)
        s.csrr_sel_D = InPort(Bits2)
        s.imm_type_D = InPort(Bits3)
        s.imul_req_en_D = InPort()
        s.imul_req_rdy_D = OutPort()

        s.reg_en_X = InPort()
        s.alu_fn_X = InPort(Bits4)
        s.ex_result_sel_X = InPort(Bits2)
        s.imul_resp_en_X = InPort()
        s.imul_resp_rdy_X = OutPort()

        s.reg_en_M = InPort()
        s.wb_result_sel_M = InPort(Bits2)

        s.reg_en_W = InPort()
        s.rf_waddr_W = InPort(Bits5)
        s.rf_wen_W = InPort(Bits1)
        s.stats_en_wen_W = InPort(Bits1)

        # Status signals (dpath->Ctrl)

        s.inst_D = OutPort(dtype)
        s.br_cond_eq_X = OutPort()
        s.br_cond_lt_X = OutPort()
        s.br_cond_ltu_X = OutPort()

        # stats_en output

        s.stats_en = OutPort()

        #---------------------------------------------------------------------
        # F stage
        #---------------------------------------------------------------------

        s.pc_F = Wire(dtype)
        s.pc_plus4_F = Wire(dtype)

        # PC+4 incrementer

        s.pc_incr_F = Incrementer(dtype, amount=4)(
            in_=s.pc_F,
            out=s.pc_plus4_F,
        )

        # forward delaration for branch target and jal target

        s.br_target_X = Wire(dtype)
        s.jal_target_D = Wire(dtype)
        s.jalr_target_X = Wire(dtype)

        # PC sel mux

        s.pc_sel_mux_F = Mux(dtype, ninputs=4)(
            in_={
                0: s.pc_plus4_F,
                1: s.br_target_X,
                2: s.jal_target_D,
                3: s.jalr_target_X,
            },
            sel=s.pc_sel_F,
        )

        s.imemreq_msg //= lambda: MemReqType(b4(0), b8(0), s.pc_sel_mux_F.out,
                                             b2(0), b32(0))

        # PC register

        s.pc_reg_F = RegEnRst(dtype, reset_value=c_reset_vector - 4)(
            en=s.reg_en_F, in_=s.pc_sel_mux_F.out, out=s.pc_F)

        #---------------------------------------------------------------------
        # D stage
        #---------------------------------------------------------------------

        # PC reg in D stage
        # This value is basically passed from F stage for the corresponding
        # instruction to use, e.g. branch to (PC+imm)

        s.pc_reg_D = RegEnRst(dtype)(
            en=s.reg_en_D,
            in_=s.pc_F,
        )

        # Instruction reg

        s.inst_D_reg = RegEnRst(dtype, reset_value=c_reset_inst)(
            en=s.reg_en_D,
            in_=s.imemresp_msg.data,
            out=s.inst_D,  # to ctrl
        )

        # Register File
        # The rf_rdata_D wires, albeit redundant in some sense, are used to
        # remind people these data are from D stage.

        s.rf_rdata0_D = Wire(dtype)
        s.rf_rdata1_D = Wire(dtype)

        s.rf_wdata_W = Wire(dtype)

        s.rf = RegisterFile(dtype,
                            nregs=32,
                            rd_ports=2,
                            wr_ports=1,
                            const_zero=True)(
                                raddr={
                                    0: s.inst_D[RS1],
                                    1: s.inst_D[RS2],
                                },
                                rdata={
                                    0: s.rf_rdata0_D,
                                    1: s.rf_rdata1_D,
                                },
                                wen={
                                    0: s.rf_wen_W
                                },
                                waddr={
                                    0: s.rf_waddr_W
                                },
                                wdata={
                                    0: s.rf_wdata_W
                                },
                            )

        # Immediate generator

        s.imm_gen_D = ImmGenPRTL()(imm_type=s.imm_type_D, inst=s.inst_D)

        s.byp_data_X = Wire(Bits32)
        s.byp_data_M = Wire(Bits32)
        s.byp_data_W = Wire(Bits32)

        # op1 bypass mux

        s.op1_byp_mux_D = Mux(dtype, ninputs=4)(in_={
            0: s.rf_rdata0_D,
            1: s.byp_data_X,
            2: s.byp_data_M,
            3: s.byp_data_W,
        },
                                                sel=s.op1_byp_sel_D)

        # op2 bypass mux

        s.op2_byp_mux_D = Mux(dtype, ninputs=4)(
            in_={
                0: s.rf_rdata1_D,
                1: s.byp_data_X,
                2: s.byp_data_M,
                3: s.byp_data_W,
            },
            sel=s.op2_byp_sel_D,
        )

        # op1 sel mux

        s.op1_sel_mux_D = Mux(dtype, ninputs=2)(
            in_={
                0: s.op1_byp_mux_D.out,
                1: s.pc_reg_D.out,
            },
            sel=s.op1_sel_D,
        )

        # csrr sel mux

        s.csrr_sel_mux_D = Mux(dtype, ninputs=3)(
            in_={
                0: s.mngr2proc_data,
                1: num_cores,
                2: s.core_id,
            },
            sel=s.csrr_sel_D,
        )

        # op2 sel mux
        # This mux chooses among RS2, imm, and the output of the above csrr
        # sel mux. Basically we are using two muxes here for pedagogy.

        s.op2_sel_mux_D = Mux(dtype, ninputs=3)(
            in_={
                0: s.op2_byp_mux_D.out,
                1: s.imm_gen_D.imm,
                2: s.csrr_sel_mux_D.out,
            },
            sel=s.op2_sel_D,
        )

        # Risc-V always calcs branch/jal target by adding imm(generated above) to PC

        s.pc_plus_imm_D = Adder(dtype)(
            in0=s.pc_reg_D.out,
            in1=s.imm_gen_D.imm,
            out=s.jal_target_D,
        )

        #---------------------------------------------------------------------
        # X stage
        #---------------------------------------------------------------------

        # imul
        # Since on the datapath diagram it's slightly left to those registers,
        # I put it at the beginning of the X stage :)

        s.imul = IntMulScycleRTL()

        s.imulresp_q = BypassQueueRTL(Bits32, 1)(enq=s.imul.minion.resp)

        s.imul.minion.req.en //= s.imul_req_en_D
        s.imul.minion.req.rdy //= s.imul_req_rdy_D
        s.imul.minion.req.msg.a //= s.op1_sel_mux_D.out
        s.imul.minion.req.msg.b //= s.op2_sel_mux_D.out

        s.imulresp_q.deq.en //= s.imul_resp_en_X
        s.imulresp_q.deq.rdy //= s.imul_resp_rdy_X

        # br_target_reg_X
        # Since branches are resolved in X stage, we register the target,
        # which is already calculated in D stage, to X stage.

        s.br_target_reg_X = RegEnRst(dtype, reset_value=0)(
            en=s.reg_en_X,
            in_=s.pc_plus_imm_D.out,
            out=s.br_target_X,
        )

        # PC reg in X stage

        s.pc_reg_X = RegEnRst(dtype, reset_value=0)(
            en=s.reg_en_X,
            in_=s.pc_reg_D.out,
        )

        # op1 reg

        s.op1_reg_X = RegEnRst(dtype, reset_value=0)(
            en=s.reg_en_X,
            in_=s.op1_sel_mux_D.out,
        )

        # op2 reg

        s.op2_reg_X = RegEnRst(dtype, reset_value=0)(
            en=s.reg_en_X,
            in_=s.op2_sel_mux_D.out,
        )

        s.xcelreq_addr //= s.op2_reg_X.out[0:5]
        s.xcelreq_data //= s.op1_reg_X.out

        # dmemreq write data reg
        # Since the op1 is the base address and op2 is the immediate so that
        # we could utilize ALU to do address calculation, we need one more
        # register to hold the R[rs2] we want to store to memory.

        s.dmem_write_data_reg_X = RegEnRst(dtype, reset_value=0)(
            en=s.reg_en_X,
            in_=s.op2_byp_mux_D.out,  # R[rs2]
            out=s.dmemreq_data,
        )

        # ALU

        s.alu_X = AluPRTL()(
            in0=s.op1_reg_X.out,
            in1=s.op2_reg_X.out,
            fn=s.alu_fn_X,
            ops_eq=s.br_cond_eq_X,
            ops_lt=s.br_cond_lt_X,
            ops_ltu=s.br_cond_ltu_X,
            out=s.jalr_target_X,
        )

        # PC+4 generator

        s.pc_incr_X = Incrementer(dtype, amount=4)(in_=s.pc_reg_X.out)

        # X result sel mux

        s.ex_result_sel_mux_X = Mux(dtype, ninputs=3)(in_={
            0: s.alu_X.out,
            1: s.imul.minion.resp.msg,
            2: s.pc_incr_X.out,
        },
                                                      sel=s.ex_result_sel_X,
                                                      out=(s.byp_data_X,
                                                           s.dmemreq_addr))

        #---------------------------------------------------------------------
        # M stage
        #---------------------------------------------------------------------

        # Alu execution result reg

        s.ex_result_reg_M = RegEnRst(dtype, reset_value=0)(
            en=s.reg_en_M, in_=s.ex_result_sel_mux_X.out)

        # Writeback result selection mux

        s.wb_result_sel_mux_M = Mux(dtype, ninputs=3)(
            in_={
                0: s.ex_result_reg_M.out,
                1: s.dmemresp_msg.data,
                2: s.xcelresp_msg.data,
            },
            sel=s.wb_result_sel_M,
            out=s.byp_data_M,
        )

        #---------------------------------------------------------------------
        # W stage
        #---------------------------------------------------------------------

        # Writeback result reg

        s.wb_result_reg_W = RegEnRst(dtype, reset_value=0)(
            en=s.reg_en_W,
            in_=s.wb_result_sel_mux_M.out,
            out=(s.byp_data_W, s.rf_wdata_W, s.proc2mngr_data),
        )

        # stats_en

        s.stats_en_reg_W = RegEnRst(dtype, reset_value=0)(
            en=s.stats_en_wen_W,
            in_=s.wb_result_reg_W.out,
        )
        s.stats_en //= s.stats_en_reg_W.out[0]
    def construct(s, idx_shamt=0):

        #---------------------------------------------------------------------
        # Interface
        #---------------------------------------------------------------------

        # Cache request

        s.cachereq_en = InPort()
        s.cachereq_rdy = OutPort()

        # Cache response

        s.cacheresp_en = OutPort()
        s.cacheresp_rdy = InPort()

        # Memory request

        s.memreq_en = OutPort()
        s.memreq_rdy = InPort()

        # Memory response

        s.memresp_en = InPort()
        s.memresp_rdy = OutPort()

        # control signals (ctrl->dpath)

        s.amo_sel = OutPort(Bits2)
        s.cachereq_enable = OutPort()
        s.memresp_enable = OutPort()
        s.is_refill = OutPort()
        s.tag_array_0_wen = OutPort()
        s.tag_array_0_ren = OutPort()
        s.tag_array_1_wen = OutPort()
        s.tag_array_1_ren = OutPort()
        s.way_sel = OutPort()
        s.way_sel_current = OutPort()
        s.data_array_wen = OutPort()
        s.data_array_ren = OutPort()
        s.skip_read_data_reg = OutPort()

        # width of cacheline divided by number of bits per byte

        s.data_array_wben = OutPort(mk_bits(clw // 8 / 4))
        s.read_data_reg_en = OutPort()
        s.read_tag_reg_en = OutPort()
        s.read_byte_sel = OutPort(mk_bits(clog2(clw // dbw)))
        s.memreq_type = OutPort(Bits4)
        s.cacheresp_type = OutPort(Bits4)
        s.cacheresp_hit = OutPort()

        # status signals (dpath->ctrl)

        s.cachereq_type = InPort(Bits4)
        s.cachereq_addr = InPort(mk_bits(abw))
        s.tag_match_0 = InPort()
        s.tag_match_1 = InPort()

        #----------------------------------------------------------------------
        # State Definitions
        #----------------------------------------------------------------------

        s.STATE_IDLE = b5(0)
        s.STATE_TAG_CHECK = b5(1)
        s.STATE_WRITE_CACHE_RESP_HIT = b5(2)
        s.STATE_WRITE_DATA_ACCESS_HIT = b5(3)
        s.STATE_READ_DATA_ACCESS_MISS = b5(4)
        s.STATE_WRITE_DATA_ACCESS_MISS = b5(5)
        s.STATE_WAIT_HIT = b5(6)
        s.STATE_WAIT_MISS = b5(7)
        s.STATE_REFILL_REQUEST = b5(8)
        s.STATE_REFILL_WAIT = b5(9)
        s.STATE_REFILL_UPDATE = b5(10)
        s.STATE_EVICT_PREPARE = b5(11)
        s.STATE_EVICT_REQUEST = b5(12)
        s.STATE_EVICT_WAIT = b5(13)
        s.STATE_AMO_READ_DATA_ACCESS = b5(14)
        s.STATE_AMO_WRITE_DATA_ACCESS = b5(15)
        s.STATE_INIT_DATA_ACCESS = b5(16)

        #----------------------------------------------------------------------
        # State Transitions
        #----------------------------------------------------------------------

        s.in_go = Wire()
        s.out_go = Wire()
        s.hit_0 = Wire()
        s.hit_1 = Wire()
        s.hit = Wire()
        s.is_read = Wire()
        s.is_write = Wire()
        s.is_init = Wire()
        s.is_amo = Wire()
        s.read_hit = Wire()
        s.write_hit = Wire()
        s.amo_hit = Wire()
        s.miss_0 = Wire()
        s.miss_1 = Wire()
        s.refill = Wire()
        s.evict = Wire()

        @s.update
        def comb_state_transition():
            s.in_go = s.cachereq_en
            s.out_go = s.cacheresp_en
            s.hit_0 = s.is_valid_0 & s.tag_match_0
            s.hit_1 = s.is_valid_1 & s.tag_match_1
            s.hit = s.hit_0 | s.hit_1
            s.is_read = s.cachereq_type == b4(0)
            s.is_write = s.cachereq_type == b4(1)
            s.is_init = s.cachereq_type == b4(2)
            s.is_amo = s.amo_sel != b2(0)
            s.read_hit = s.is_read & s.hit
            s.write_hit = s.is_write & s.hit
            s.amo_hit = s.is_amo & s.hit
            s.miss_0 = ~s.hit_0
            s.miss_1 = ~s.hit_1
            s.refill    = (s.miss_0 & ~s.is_dirty_0 & ~s.lru_way) | \
                          (s.miss_1 & ~s.is_dirty_1 &  s.lru_way)
            s.evict     = (s.miss_0 &  s.is_dirty_0 & ~s.lru_way) | \
                          (s.miss_1 &  s.is_dirty_1 &  s.lru_way)

        # determine amo type

        @s.update
        def comb_amo_type():
            if s.cachereq_type == b4(3): s.amo_sel = b2(1)
            if s.cachereq_type == b4(4): s.amo_sel = b2(2)
            if s.cachereq_type == b4(5): s.amo_sel = b2(3)
            else: s.amo_sel = b2(0)

        #----------------------------------------------------------------------
        # State
        #----------------------------------------------------------------------

        s.state = Wire(Bits5)
        s.next_state = Wire(Bits5)

        @s.update_ff
        def reg_state():
            if s.reset:
                s.state <<= s.STATE_IDLE
            else:
                s.state <<= s.next_state

        @s.update
        def comb_next_state():
            s.next_state = s.state

            if s.state == s.STATE_IDLE:
                if s.in_go: s.next_state = s.STATE_TAG_CHECK

            elif s.state == s.STATE_TAG_CHECK:
                if s.is_init: s.next_state = s.STATE_INIT_DATA_ACCESS
                elif s.read_hit & s.cacheresp_rdy & s.cachereq_en:
                    s.next_state = s.STATE_TAG_CHECK
                elif s.read_hit & s.cacheresp_rdy & ~s.cachereq_en:
                    s.next_state = s.STATE_IDLE
                elif s.read_hit & ~s.cacheresp_rdy:
                    s.next_state = s.STATE_WAIT_HIT
                elif s.write_hit & s.cacheresp_rdy:
                    s.next_state = s.STATE_WRITE_DATA_ACCESS_HIT
                elif s.write_hit & ~s.cacheresp_rdy:
                    s.next_state = s.STATE_WRITE_CACHE_RESP_HIT
                elif s.amo_hit:
                    s.next_state = s.STATE_AMO_READ_DATA_ACCESS
                elif s.refill:
                    s.next_state = s.STATE_REFILL_REQUEST
                elif s.evict:
                    s.next_state = s.STATE_EVICT_PREPARE

            elif s.state == s.STATE_WRITE_CACHE_RESP_HIT:
                if s.cacheresp_rdy:
                    s.next_state = s.STATE_WRITE_DATA_ACCESS_HIT

            elif s.state == s.STATE_WRITE_DATA_ACCESS_HIT:
                if s.cachereq_en: s.next_state = s.STATE_TAG_CHECK
                else: s.next_state = s.STATE_IDLE

            elif s.state == s.STATE_READ_DATA_ACCESS_MISS:
                s.next_state = s.STATE_WAIT_MISS

            elif s.state == s.STATE_WRITE_DATA_ACCESS_MISS:
                if (s.cacheresp_rdy): s.next_state = s.STATE_IDLE
                else: s.next_state = s.STATE_WAIT_MISS

            elif s.state == s.STATE_INIT_DATA_ACCESS:
                s.next_state = s.STATE_WAIT_MISS

            elif s.state == s.STATE_AMO_READ_DATA_ACCESS:
                s.next_state = s.STATE_AMO_WRITE_DATA_ACCESS

            elif s.state == s.STATE_AMO_WRITE_DATA_ACCESS:
                s.next_state = s.STATE_WAIT_MISS

            elif s.state == s.STATE_REFILL_REQUEST:
                if s.memreq_rdy: s.next_state = s.STATE_REFILL_WAIT

            elif s.state == s.STATE_REFILL_WAIT:
                if s.memresp_en: s.next_state = s.STATE_REFILL_UPDATE

            elif s.state == s.STATE_REFILL_UPDATE:
                if s.is_read: s.next_state = s.STATE_READ_DATA_ACCESS_MISS
                elif s.is_write: s.next_state = s.STATE_WRITE_DATA_ACCESS_MISS
                elif s.is_amo: s.next_state = s.STATE_AMO_READ_DATA_ACCESS

            elif s.state == s.STATE_EVICT_PREPARE:
                s.next_state = s.STATE_EVICT_REQUEST

            elif s.state == s.STATE_EVICT_REQUEST:
                if s.memreq_rdy: s.next_state = s.STATE_EVICT_WAIT

            elif s.state == s.STATE_EVICT_WAIT:
                if s.memresp_en: s.next_state = s.STATE_REFILL_REQUEST

            elif s.state == s.STATE_WAIT_HIT:
                if s.out_go: s.next_state = s.STATE_IDLE

            elif s.state == s.STATE_WAIT_MISS:
                if s.out_go: s.next_state = s.STATE_IDLE

        #----------------------------------------------------------------------
        # Valid/Dirty bits record
        #----------------------------------------------------------------------

        s.cachereq_idx = Wire(mk_bits(idw))
        s.valid_bit_in = Wire()
        s.valid_bits_write_en = Wire()
        s.valid_bits_write_en_0 = Wire()
        s.valid_bits_write_en_1 = Wire()
        s.is_valid_0 = Wire()
        s.is_valid_1 = Wire()

        s.cachereq_idx //= s.cachereq_addr[4 + idx_shamt:idw_off + idx_shamt]

        @s.update
        def comb_valid_bits_en():
            s.valid_bits_write_en_0 = s.valid_bits_write_en & ~s.way_sel_current
            s.valid_bits_write_en_1 = s.valid_bits_write_en & s.way_sel_current

        s.valid_bits_0 = RegisterFile(Bits1,
                                      nregs=nblocks // 2,
                                      rd_ports=1,
                                      wr_ports=1,
                                      const_zero=False)(
                                          raddr={
                                              0: s.cachereq_idx
                                          },
                                          rdata={
                                              0: s.is_valid_0
                                          },
                                          wen={
                                              0: s.valid_bits_write_en_0
                                          },
                                          waddr={
                                              0: s.cachereq_idx
                                          },
                                          wdata={
                                              0: s.valid_bit_in
                                          },
                                      )

        s.valid_bits_1 = RegisterFile(Bits1,
                                      nregs=nblocks // 2,
                                      rd_ports=1,
                                      wr_ports=1,
                                      const_zero=False)(
                                          raddr={
                                              0: s.cachereq_idx
                                          },
                                          rdata={
                                              0: s.is_valid_1
                                          },
                                          wen={
                                              0: s.valid_bits_write_en_1
                                          },
                                          waddr={
                                              0: s.cachereq_idx
                                          },
                                          wdata={
                                              0: s.valid_bit_in
                                          },
                                      )

        s.dirty_bit_in = Wire()
        s.dirty_bits_write_en = Wire()
        s.dirty_bits_write_en_0 = Wire()
        s.dirty_bits_write_en_1 = Wire()
        s.is_dirty_0 = Wire()
        s.is_dirty_1 = Wire()

        @s.update
        def comb_cachereq_idx():
            s.dirty_bits_write_en_0 = s.dirty_bits_write_en & ~s.way_sel_current
            s.dirty_bits_write_en_1 = s.dirty_bits_write_en & s.way_sel_current

        s.dirty_bits_0 = RegisterFile(Bits1,
                                      nregs=nblocks // 2,
                                      rd_ports=1,
                                      wr_ports=1,
                                      const_zero=False)(
                                          raddr={
                                              0: s.cachereq_idx
                                          },
                                          rdata={
                                              0: s.is_dirty_0
                                          },
                                          wen={
                                              0: s.dirty_bits_write_en_0
                                          },
                                          waddr={
                                              0: s.cachereq_idx
                                          },
                                          wdata={
                                              0: s.dirty_bit_in
                                          },
                                      )

        s.dirty_bits_1 = RegisterFile(Bits1,
                                      nregs=nblocks // 2,
                                      rd_ports=1,
                                      wr_ports=1,
                                      const_zero=False)(
                                          raddr={
                                              0: s.cachereq_idx
                                          },
                                          rdata={
                                              0: s.is_dirty_1
                                          },
                                          wen={
                                              0: s.dirty_bits_write_en_1
                                          },
                                          waddr={
                                              0: s.cachereq_idx
                                          },
                                          wdata={
                                              0: s.dirty_bit_in
                                          },
                                      )

        s.lru_bit_in = Wire()
        s.lru_bits_write_en = Wire()
        s.lru_way = Wire()

        s.lru_bits = RegisterFile(Bits1,
                                  nregs=nblocks // 2,
                                  rd_ports=1,
                                  wr_ports=1,
                                  const_zero=False)(
                                      raddr={
                                          0: s.cachereq_idx
                                      },
                                      rdata={
                                          0: s.lru_way
                                      },
                                      wen={
                                          0: s.lru_bits_write_en
                                      },
                                      waddr={
                                          0: s.cachereq_idx
                                      },
                                      wdata={
                                          0: s.lru_bit_in
                                      },
                                  )

        #----------------------------------------------------------------------
        # Way selection.
        #   The way is determined in the tag check state, and is
        #   then recorded for the entire transaction
        #----------------------------------------------------------------------

        s.way_record_en = Wire()
        s.way_record_in = Wire()

        @s.update
        def comb_way_select():
            if s.hit:
                if s.hit_0:
                    s.way_record_in = b1(0)
                else:
                    if s.hit_1:
                        s.way_record_in = b1(1)
                    else:
                        s.way_record_in = b1(0)
            else:
                s.way_record_in = s.lru_way

            if s.state == s.STATE_TAG_CHECK:
                s.way_sel_current = s.way_record_in
            else:
                s.way_sel_current = s.way_sel

        s.way_record = RegEnRst(Bits1, reset_value=0)(
            en=s.way_record_en,
            in_=s.way_record_in,
            out=s.way_sel,
        )

        #----------------------------------------------------------------------
        # State Outputs
        #----------------------------------------------------------------------

        # General parameters
        x = b1(0)
        y = b1(1)
        n = b1(0)

        # Parameters for is_refill
        r_x = b1(0)
        r_c = b1(0)  # fill data array from _c_ache
        r_m = b1(1)  # fill data array from _m_em

        # Parameters for memreq_type_mux
        m_x = b4(0)
        m_e = b4(1)
        m_r = b4(0)

        s.tag_array_wen = Wire()
        s.tag_array_ren = Wire()
        s.cacheresp_val = Wire()
        s.memreq_val = Wire()

        # Control signal bit slices

        CS_cachereq_rdy = slice(20, 21)
        CS_cacheresp_val = slice(19, 20)
        CS_memreq_val = slice(18, 19)
        CS_memresp_rdy = slice(17, 18)
        CS_cachereq_enable = slice(16, 17)
        CS_memresp_enable = slice(15, 16)
        CS_is_refill = slice(14, 15)
        CS_read_data_reg_en = slice(13, 14)
        CS_read_tag_reg_en = slice(12, 13)
        CS_memreq_type = slice(8, 12)  # 4 bits
        CS_valid_bit_in = slice(7, 8)
        CS_valid_bits_write_en = slice(6, 7)
        CS_dirty_bit_in = slice(5, 6)
        CS_dirty_bits_write_en = slice(4, 5)
        CS_lru_bits_write_en = slice(3, 4)
        CS_way_record_en = slice(2, 3)
        CS_cacheresp_hit = slice(1, 2)
        CS_skip_read_data_reg = slice(0, 1)

        s.cs = Wire(Bits21)

        @s.update
        def comb_control_table():
            sr = s.state

            #                                                        $    $    mem mem  $    mem         read read mem  valid valid dirty dirty lru   way    $    skip
            #                                                        req  resp req resp req  resp is     data tag  req  bit   write bit   write write record resp data
            #                                                        rdy  val  val rdy  en   en   refill en   en   type in    en    in    en    en    en     hit  reg
            if sr == s.STATE_IDLE:
                s.cs = concat(y, n, n, n, y, n, r_x, n, n, m_x, x, n, x, n, n,
                              n, n, n)
            elif sr == s.STATE_TAG_CHECK:
                s.cs = concat(n, n, n, n, n, n, r_x, y, n, m_x, x, n, x, n, y,
                              y, n, y)
            elif sr == s.STATE_WRITE_CACHE_RESP_HIT:
                s.cs = concat(n, y, n, n, n, n, r_x, n, n, m_x, x, n, x, n, y,
                              n, y, n)
            elif sr == s.STATE_WRITE_DATA_ACCESS_HIT:
                s.cs = concat(n, n, n, n, n, n, r_c, n, n, m_x, y, y, y, y, y,
                              n, y, n)
            elif sr == s.STATE_READ_DATA_ACCESS_MISS:
                s.cs = concat(n, n, n, n, n, n, r_x, y, n, m_x, x, n, x, n, y,
                              n, n, n)
            elif sr == s.STATE_WRITE_DATA_ACCESS_MISS:
                s.cs = concat(n, y, n, n, n, n, r_c, n, n, m_x, y, y, y, y, y,
                              n, n, n)
            elif sr == s.STATE_INIT_DATA_ACCESS:
                s.cs = concat(n, n, n, n, n, n, r_c, n, n, m_x, y, y, n, y, y,
                              n, n, n)
            elif sr == s.STATE_AMO_READ_DATA_ACCESS:
                s.cs = concat(n, n, n, n, n, n, r_x, y, n, m_x, x, n, x, n, y,
                              n, n, n)
            elif sr == s.STATE_AMO_WRITE_DATA_ACCESS:
                s.cs = concat(n, n, n, n, n, n, r_c, n, n, m_x, y, y, y, y, y,
                              n, n, n)
            elif sr == s.STATE_REFILL_REQUEST:
                s.cs = concat(n, n, y, n, n, n, r_x, n, n, m_r, x, n, x, n, n,
                              n, n, n)
            elif sr == s.STATE_REFILL_WAIT:
                s.cs = concat(n, n, n, y, n, y, r_m, n, n, m_x, x, n, x, n, n,
                              n, n, n)
            elif sr == s.STATE_REFILL_UPDATE:
                s.cs = concat(n, n, n, n, n, n, r_x, n, n, m_x, y, y, n, y, n,
                              n, n, n)
            elif sr == s.STATE_EVICT_PREPARE:
                s.cs = concat(n, n, n, n, n, n, r_x, y, y, m_x, x, n, x, n, n,
                              n, n, n)
            elif sr == s.STATE_EVICT_REQUEST:
                s.cs = concat(n, n, y, n, n, n, r_x, n, n, m_e, x, n, x, n, n,
                              n, n, n)
            elif sr == s.STATE_EVICT_WAIT:
                s.cs = concat(n, n, n, y, n, n, r_x, n, n, m_x, x, n, x, n, n,
                              n, n, n)
            elif sr == s.STATE_WAIT_HIT:
                s.cs = concat(n, y, n, n, n, n, r_x, n, n, m_x, x, n, x, n, n,
                              n, y, n)
            elif sr == s.STATE_WAIT_MISS:
                s.cs = concat(n, y, n, n, n, n, r_x, n, n, m_x, x, n, x, n, n,
                              n, n, n)
            else:
                s.cs = concat(n, n, n, n, n, n, r_x, n, n, m_x, x, n, x, n, n,
                              n, n, n)

            # Unpack signals

            s.cachereq_rdy = s.cs[CS_cachereq_rdy]
            s.cacheresp_val = s.cs[CS_cacheresp_val]
            s.memreq_val = s.cs[CS_memreq_val]
            s.memresp_rdy = s.cs[CS_memresp_rdy]
            s.cachereq_enable = s.cs[CS_cachereq_enable]
            s.memresp_enable = s.cs[CS_memresp_enable]
            s.is_refill = s.cs[CS_is_refill]
            s.read_data_reg_en = s.cs[CS_read_data_reg_en]
            s.read_tag_reg_en = s.cs[CS_read_tag_reg_en]
            s.memreq_type = s.cs[CS_memreq_type]
            s.valid_bit_in = s.cs[CS_valid_bit_in]
            s.valid_bits_write_en = s.cs[CS_valid_bits_write_en]
            s.dirty_bit_in = s.cs[CS_dirty_bit_in]
            s.dirty_bits_write_en = s.cs[CS_dirty_bits_write_en]
            s.lru_bits_write_en = s.cs[CS_lru_bits_write_en]
            s.way_record_en = s.cs[CS_way_record_en]
            s.cacheresp_hit = s.cs[CS_cacheresp_hit]
            s.skip_read_data_reg = s.cs[CS_skip_read_data_reg]

            s.cacheresp_en = s.cacheresp_val & s.cacheresp_rdy

            # set cacheresp_val when there is a hit for one hit latency
            if (s.read_hit | s.write_hit) and (s.state == s.STATE_TAG_CHECK):
                s.cacheresp_en = s.cacheresp_rdy
                s.cacheresp_hit = b1(1)

                # if read hit, if can send response, immediately take new cachereq
                if s.read_hit:
                    s.cachereq_rdy = s.cacheresp_rdy
                    s.cachereq_enable = s.cacheresp_rdy

            # since cacheresp already handled, can immediately take new cachereq
            elif s.state == s.STATE_WRITE_DATA_ACCESS_HIT:
                s.cachereq_rdy = b1(1)
                s.cachereq_enable = b1(1)

            s.memreq_en = s.memreq_val & s.memreq_rdy

        # Control bits based on next state

        NS_tag_array_wen = slice(3, 4)
        NS_tag_array_ren = slice(2, 3)
        NS_data_array_wen = slice(1, 2)
        NS_data_array_ren = slice(0, 1)

        s.ns = Wire(Bits4)

        @s.update
        def comb_control_table_next():

            # set enable for tag_array and data_array one cycle early (dependant on next_state)
            sn = s.next_state
            #                                                         tag   tag   data  data
            #                                                         array array array array
            #                                                         wen   ren   wen   ren
            if sn == s.STATE_IDLE: s.ns = concat(
                    n,
                    n,
                    n,
                    n,
            )
            elif sn == s.STATE_TAG_CHECK: s.ns = concat(
                    n,
                    y,
                    n,
                    y,
            )
            elif sn == s.STATE_WRITE_CACHE_RESP_HIT:
                s.ns = concat(
                    n,
                    n,
                    n,
                    n,
                )
            elif sn == s.STATE_WRITE_DATA_ACCESS_HIT:
                s.ns = concat(
                    y,
                    n,
                    y,
                    n,
                )
            elif sn == s.STATE_READ_DATA_ACCESS_MISS:
                s.ns = concat(
                    n,
                    n,
                    n,
                    y,
                )
            elif sn == s.STATE_WRITE_DATA_ACCESS_MISS:
                s.ns = concat(
                    y,
                    n,
                    y,
                    n,
                )
            elif sn == s.STATE_INIT_DATA_ACCESS:
                s.ns = concat(
                    y,
                    n,
                    y,
                    n,
                )
            elif sn == s.STATE_AMO_READ_DATA_ACCESS:
                s.ns = concat(
                    n,
                    n,
                    n,
                    y,
                )
            elif sn == s.STATE_AMO_WRITE_DATA_ACCESS:
                s.ns = concat(
                    y,
                    n,
                    y,
                    n,
                )
            elif sn == s.STATE_REFILL_REQUEST:
                s.ns = concat(
                    n,
                    n,
                    n,
                    n,
                )
            elif sn == s.STATE_REFILL_WAIT:
                s.ns = concat(
                    n,
                    n,
                    n,
                    n,
                )
            elif sn == s.STATE_REFILL_UPDATE:
                s.ns = concat(
                    y,
                    n,
                    y,
                    n,
                )
            elif sn == s.STATE_EVICT_PREPARE:
                s.ns = concat(
                    n,
                    y,
                    n,
                    y,
                )
            elif sn == s.STATE_EVICT_REQUEST:
                s.ns = concat(
                    n,
                    n,
                    n,
                    n,
                )
            elif sn == s.STATE_EVICT_WAIT:
                s.ns = concat(
                    n,
                    n,
                    n,
                    n,
                )
            elif sn == s.STATE_WAIT_HIT:
                s.ns = concat(
                    n,
                    n,
                    n,
                    n,
                )
            elif sn == s.STATE_WAIT_MISS:
                s.ns = concat(
                    n,
                    n,
                    n,
                    n,
                )
            else:
                s.ns = concat(
                    n,
                    n,
                    n,
                    n,
                )

            # Unpack signals

            s.tag_array_wen = s.ns[NS_tag_array_wen]
            s.tag_array_ren = s.ns[NS_tag_array_ren]
            s.data_array_wen = s.ns[NS_data_array_wen]
            s.data_array_ren = s.ns[NS_data_array_ren]

        # lru bit determination
        @s.update
        def comb_lru_bit_in():
            s.lru_bit_in = ~s.way_sel_current

        # tag array enables
        @s.update
        def comb_tag_arry_en():
            s.tag_array_0_wen = s.tag_array_wen & ~s.way_sel_current
            s.tag_array_0_ren = s.tag_array_ren
            s.tag_array_1_wen = s.tag_array_wen & s.way_sel_current
            s.tag_array_1_ren = s.tag_array_ren

        # Building data_array_wben
        # This is in control because we want to facilitate more complex patterns
        #   when we want to start supporting subword accesses

        s.cachereq_offset = Wire(Bits2)
        s.wben_decoder_out = Wire(Bits4)

        s.cachereq_offset //= s.cachereq_addr[2:4]
        # Choose byte to read from cacheline based on what the offset was
        s.read_byte_sel //= s.cachereq_addr[2:4]

        @s.update
        def comb_enable_writing():

            # Logic to enable writing of the entire cacheline in case of refill
            # and just one word for writes and init

            if s.is_refill: s.data_array_wben = b4(0xf)
            else: s.data_array_wben = b4(1) << s.cachereq_offset

            # Managing the cache response type based on cache request type

        s.cacheresp_type //= s.cachereq_type
Esempio n. 10
0
  def construct( s ):

    #---------------------------------------------------------------------
    # Interface
    #---------------------------------------------------------------------

    # imem ports
    s.imemreq_addr   = OutPort( Bits32 )
    s.imemresp_data  = InPort ( Bits32 )

    # dmem ports
    s.dmemreq_addr   = OutPort( Bits32 )
    s.dmemreq_data   = OutPort( Bits32 )
    s.dmemresp_data  = InPort ( Bits32 )

    # mngr ports
    s.mngr2proc_data = InPort ( Bits32 )
    s.proc2mngr_data = OutPort( Bits32 )

    # xcel ports
    s.xcelreq_addr   = OutPort( Bits5 )
    s.xcelreq_data   = OutPort( Bits32 )
    s.xcelresp_data  = InPort ( Bits32 )

    # Control signals (ctrl->dpath)

    s.reg_en_F         = InPort ( Bits1 )
    s.pc_sel_F         = InPort ( Bits1 )

    s.reg_en_D         = InPort ( Bits1 )
    s.op1_byp_sel_D    = InPort ( Bits2 )
    s.op2_byp_sel_D    = InPort ( Bits2 )
    s.op2_sel_D        = InPort ( Bits2 )
    s.imm_type_D       = InPort ( Bits3 )

    s.reg_en_X         = InPort ( Bits1 )
    s.alu_fn_X         = InPort ( Bits4 )

    s.reg_en_M         = InPort ( Bits1 )
    s.wb_result_sel_M  = InPort ( Bits2 )

    s.reg_en_W         = InPort ( Bits1 )
    s.rf_waddr_W       = InPort ( Bits5 )
    s.rf_wen_W         = InPort ( Bits1 )

    # Status signals (dpath->Ctrl)

    s.inst_D           = OutPort( Bits32 )
    s.ne_X             = OutPort( Bits1 )

    #---------------------------------------------------------------------
    # F stage
    #---------------------------------------------------------------------

    s.pc_F        = Wire( Bits32 )
    s.pc_plus4_F  = Wire( Bits32 )

    # PC+4 incrementer

    s.pc_incr_F = Incrementer( Bits32, amount=4 )(
      in_ = s.pc_F,
      out = s.pc_plus4_F,
    )

    # forward delaration for branch target and jal target

    s.br_target_X  = Wire( Bits32 )

    # PC sel mux

    s.pc_sel_mux_F = Mux( Bits32, 2 )(
      in_ = { 0: s.pc_plus4_F, 1: s.br_target_X },
      sel = s.pc_sel_F,
      out = s.imemreq_addr,
    )

    # PC register

    s.pc_reg_F = RegEnRst( Bits32, reset_value=c_reset_vector-4 )(
      en  = s.reg_en_F,
      in_ = s.pc_sel_mux_F.out,
      out = s.pc_F,
    )

    #---------------------------------------------------------------------
    # D stage
    #---------------------------------------------------------------------

    # PC reg in D stage
    # This value is basically passed from F stage for the corresponding
    # instruction to use, e.g. branch to (PC+imm)

    s.pc_reg_D = m = RegEnRst( Bits32 )( en = s.reg_en_D, in_ = s.pc_F )

    # Instruction reg

    s.inst_D_reg = m = RegEnRst( Bits32, reset_value=c_reset_inst )(
      en  = s.reg_en_D,
      in_ = s.imemresp_data,
      out = s.inst_D # to ctrl
    )

    # Register File
    # The rf_rdata_D wires, albeit redundant in some sense, are used to
    # remind people these data are from D stage.

    s.rf_rdata0_D = Wire( Bits32 )
    s.rf_rdata1_D = Wire( Bits32 )

    s.rf_wdata_W  = Wire( Bits32 )

    s.rf = RegisterFile( Bits32, nregs=32, rd_ports=2, wr_ports=1, const_zero=True )(
      raddr = { 0: s.inst_D[ RS1 ],
                1: s.inst_D[ RS2 ], },
      rdata = { 0: s.rf_rdata0_D,
                1: s.rf_rdata1_D, },
      wen   = { 0: s.rf_wen_W },
      waddr = { 0: s.rf_waddr_W },
      wdata = { 0: s.rf_wdata_W },
    )

    # Immediate generator

    s.immgen_D = ImmGenRTL()( imm_type = s.imm_type_D, inst = s.inst_D )

    s.bypass_X = Wire( Bits32 )
    s.bypass_M = Wire( Bits32 )
    s.bypass_W = Wire( Bits32 )

    # op1 bypass mux

    s.op1_byp_mux_D = Mux( Bits32, 4 )(
      in_ = { 0: s.rf_rdata0_D,
              1: s.bypass_X,
              2: s.bypass_M,
              3: s.bypass_W, },
      sel = s.op1_byp_sel_D,
    )

    # op2 bypass mux

    s.op2_byp_mux_D = Mux( Bits32, 4 )(
      in_ = { 0: s.rf_rdata1_D,
              1: s.bypass_X,
              2: s.bypass_M,
              3: s.bypass_W, },
      sel = s.op2_byp_sel_D,
    )

    # op2 sel mux
    # This mux chooses among RS2, imm, and the mngr2proc.
    # Basically we are using two muxes here for pedagogy.

    s.op2_sel_mux_D = Mux( Bits32, 3 )(
      in_ = { 0: s.op2_byp_mux_D.out,
              1: s.immgen_D.imm,
              2: s.mngr2proc_data, },
      sel = s.op2_sel_D,
    )

    # Risc-V always calcs branch target by adding imm(generated above) to PC

    s.pc_plus_imm_D = Adder( Bits32 )(
      in0 = s.pc_reg_D.out,
      in1 = s.immgen_D.imm,
    )

    #---------------------------------------------------------------------
    # X stage
    #---------------------------------------------------------------------

    # br_target_reg_X
    # Since branches are resolved in X stage, we register the target,
    # which is already calculated in D stage, to X stage.

    s.br_target_reg_X = RegEnRst( Bits32, reset_value=0 )(
      en  = s.reg_en_X,
      in_ = s.pc_plus_imm_D.out,
      out = s.br_target_X,
    )

    # op1 reg

    s.op1_reg_X = RegEnRst( Bits32, reset_value=0 )(
      en  = s.reg_en_X,
      in_ = s.op1_byp_mux_D.out,
    )

    # op2 reg

    s.op2_reg_X = m = RegEnRst( Bits32, reset_value=0 )(
      en  = s.reg_en_X,
      in_ = s.op2_sel_mux_D.out,
    )

    # Send out xcelreq msg
    s.xcelreq_data //= s.op1_reg_X.out
    s.xcelreq_addr //= s.op2_reg_X.out[0:5]

    # store data reg
    # Since the op1 is the base address and op2 is the immediate so that
    # we could utilize ALU to do address calculation, we need one more
    # register to hold the R[rs2] we want to store to memory.

    s.store_reg_X = RegEnRst( Bits32, reset_value=0 )(
      en  = s.reg_en_X,
      in_ = s.op2_byp_mux_D.out, # R[rs2]
      out = s.dmemreq_data,
    )

    # ALU

    s.alu_X = AluRTL()(
      in0     = s.op1_reg_X.out,
      in1     = s.op2_reg_X.out,
      fn      = s.alu_fn_X,
      ops_ne  = s.ne_X,
      out     = ( s.bypass_X, s.dmemreq_addr )
    )

    #---------------------------------------------------------------------
    # M stage
    #---------------------------------------------------------------------

    # Alu execution result reg

    s.ex_result_reg_M = m = RegEnRst( Bits32, reset_value=0 )(
      en  = s.reg_en_M,
      in_ = s.alu_X.out
    )

    # Writeback result selection mux

    s.wb_result_sel_mux_M = Mux( Bits32, 3 )(
      in_ = { 0: s.ex_result_reg_M.out,
              1: s.dmemresp_data,
              2: s.xcelresp_data, },
      sel = s.wb_result_sel_M,
      out = s.bypass_M,
    )

    #---------------------------------------------------------------------
    # W stage
    #---------------------------------------------------------------------

    # Writeback result reg

    s.wb_result_reg_W = RegEnRst( Bits32, reset_value=0 )(
      en  = s.reg_en_W,
      in_ = s.wb_result_sel_mux_M.out,
      out = ( s.bypass_W, s.rf_wdata_W, s.proc2mngr_data ),
    )