def __init__( s, num_banks=0 ): # Parameters idx_shamt = clog2( num_banks ) if num_banks > 0 else 0 size = 256 # 256 bytes opaque_nbits = 8 # 8-bit opaque field addr_nbits = 32 # 32-bit address data_nbits = 32 # 32-bit data access cacheline_nbits = 128 # 128-bit cacheline #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Proc <-> Cache s.cachereq = InValRdyBundle ( MemReqMsg(opaque_nbits, addr_nbits, data_nbits) ) s.cacheresp = OutValRdyBundle( MemRespMsg(opaque_nbits, data_nbits) ) # Cache <-> Mem s.memreq = OutValRdyBundle( MemReqMsg(opaque_nbits, addr_nbits, cacheline_nbits) ) s.memresp = InValRdyBundle ( MemRespMsg(opaque_nbits, cacheline_nbits) ) s.ctrl = BlockingCacheBaseCtrlPRTL ( idx_shamt ) s.dpath = BlockingCacheBaseDpathPRTL( idx_shamt )
def __init__(s, nports, src_msgs, sink_msgs, stall_prob, latency, src_delay, sink_delay): assert nports <= 2 # only support 1 # Message type req = MemReqMsg(8, 32, 32) resp = MemRespMsg(8, 32) # Instantiate models s.srcs = [] for i in xrange(nports): s.srcs.append(TestSource(req, src_msgs[i])) s.mem = TestMemoryCL(nports, [req] * nports, [resp] * nports) s.sinks = [] for i in xrange(nports): s.sinks.append(TestSink(resp, sink_msgs[i])) # Connect for i in xrange(nports): s.srcs[i].send |= s.mem.ifcs[i].req s.sinks[i].recv |= s.mem.ifcs[i].resp
def __init__( s, nports, src_msgs, sink_msgs, stall_prob, latency, src_delay, sink_delay ): # Message type req = MemReqMsg(8, 32, 32) resp = MemRespMsg(8, 32) # Instantiate models s.srcs = [] for i in xrange(nports): s.srcs.append( TestSourceValRdy( req, src_msgs[i] ) ) s.mem = TestMemoryRTL( nports, [req]*nports, [resp]*nports ) s.sinks = [] for i in xrange(nports): s.sinks.append( TestSinkValRdy( resp, sink_msgs[i] ) ) # Connect for i in xrange( nports ): s.connect( s.srcs[i].out, s.mem.reqs[i] ) s.connect( s.sinks[i].in_, s.mem.resps[i] )
def __init__(s): # interface s.in_control = InValRdyBundle[nPE](inst_msg()) # control word s.ocmreqs = OutValRdyBundle[nPE](MemReqMsg(8, 32, nWid)) s.ocmresps = InValRdyBundle[nPE](MemRespMsg(8, nWid)) s.out_fsm = OutValRdyBundle[nPE](1) # response to FSM #PEs s.pe = PeRTL[nPE]() s.queue = SingleElementBypassQueue[2 * (nPE - 1)](nWid) for i in range(nPE - 1): s.connect(s.pe[i].out_neighbor[0], s.queue[2 * i].enq) s.connect(s.queue[2 * i].deq, s.pe[i + 1].in_neighbor[1]) s.connect(s.pe[i + 1].out_neighbor[1], s.queue[2 * i + 1].enq) s.connect(s.queue[2 * i + 1].deq, s.pe[i].in_neighbor[0]) for x in range(nPE): s.connect(s.in_control[x], s.pe[x].in_control) s.connect(s.out_fsm[x], s.pe[x].out_fsm) # Connections from PE memreq and memresp ports to Dpath ports s.connect(s.ocmreqs[x], s.pe[x].ocmreqs) s.connect(s.ocmresps[x], s.pe[x].ocmresps)
def test_BitStructs(): class ExampleModel( Model ): def __init__( s ): # MemReqMsg(addr_nbits, data_nbits) is a BitStruct datatype: # +------+-----------+------+-----------+ # | type | addr | len | data | # +------+-----------+------+-----------+ dtype = MemReqMsg( 32, 32 ) s.in_ = InPort( dtype ) @s.tick def logic(): # BitStructs are subclasses of Bits, we can slice them addr, data = s.in_[34:66], s.in_[0:32] # ... but it's usually more convenient to use fields! addr, data = s.in_.addr, s.in_.data assert s.in_[34:66] == s.in_.addr assert s.in_[ 0:32] == s.in_.data print addr, data model = ExampleModel() model.elaborate() sim = SimulationTool( model ) model.in_.value = MemReqMsg( 32, 32 ).mk_msg( 1, 8, 2, 5 ) print sim.cycle() sim.cycle() sim.cycle()
def __init__( s, nbits = 8, nports = 1, bw = 32, n = 8, ): #--------------------------------------------------------------------------- # Interface #--------------------------------------------------------------------------- s.direq = InValRdyBundle ( pageRankReqMsg() ) s.diresp = OutValRdyBundle ( pageRankRespMsg() ) s.memreq = OutValRdyBundle ( MemReqMsg(8,32,32) ) s.memresp = InValRdyBundle ( MemRespMsg(8,32) ) #--------------------------------------------------------------------------- # Verilog import setup #--------------------------------------------------------------------------- # verilog parameters s.set_params({ 'nbits' : nbits, 'nports' : nports, 'bw' : bw, 'n' : n }) # verilog ports s.set_ports({ 'clk' : s.clk, 'reset' : s.reset, # 'in_req_val' : s.direq.val, # 'out_req_rdy' : s.direq.rdy, # 'in_type' : s.direq.msg.type_, # 'in_addr' : s.direq.msg.addr, # 'in_data' : s.direq.msg.data, # 'out_resp_val' : s.diresp.val, # 'in_resp_rdy' : s.diresp.rdy, # 'out_type' : s.diresp.msg.type_, # 'out_data' : s.diresp.msg.data, 'pr_req_msg' : s.direq.msg, 'pr_req_val' : s.direq.val, 'pr_req_rdy' : s.direq.rdy, 'pr_resp_msg' : s.diresp.msg, 'pr_resp_val' : s.diresp.val, 'pr_resp_rdy' : s.diresp.rdy, 'mem_req_msg' : s.memreq.msg, 'mem_req_val' : s.memreq.val, 'mem_req_rdy' : s.memreq.rdy, 'mem_resp_msg' : s.memresp.msg, 'mem_resp_val' : s.memresp.val, 'mem_resp_rdy' : s.memresp.rdy, })
def __init__(s, nbits=32, nports=2, n=8): #--------------------------------------------------------------------------- # Interface #--------------------------------------------------------------------------- s.direq = InValRdyBundle(pageRankReqMsg()) s.diresp = OutValRdyBundle(pageRankRespMsg()) s.memreq = [ OutValRdyBundle(MemReqMsg(8, 32, 32)) for _ in range(nports) ] s.memresp = [InValRdyBundle(MemRespMsg(8, 32)) for _ in range(nports)] #--------------------------------------------------------------------------- # Verilog import setup #--------------------------------------------------------------------------- # verilog parameters s.set_params({'nbits': nbits, 'nports': nports, 'n': n}) # verilog ports s.set_ports({ 'clk': s.clk, 'reset': s.reset, # 'in_req_val' : s.direq.val, # 'out_req_rdy' : s.direq.rdy, # 'in_type' : s.direq.msg.type_, # 'in_addr' : s.direq.msg.addr, # 'in_data' : s.direq.msg.data, # 'out_resp_val' : s.diresp.val, # 'in_resp_rdy' : s.diresp.rdy, # 'out_type' : s.diresp.msg.type_, # 'out_data' : s.diresp.msg.data, 'in_req_msg': s.direq.msg, 'in_req_val': s.direq.val, 'in_req_rdy': s.direq.rdy, 'out_resp_msg': s.diresp.msg, 'out_resp_val': s.diresp.val, 'out_resp_rdy': s.diresp.rdy, 'mem_req0_msg': s.memreq[0].msg, 'mem_req0_val': s.memreq[0].val, 'mem_req0_rdy': s.memreq[0].rdy, 'mem_resp0_msg': s.memresp[0].msg, 'mem_resp0_val': s.memresp[0].val, 'mem_resp0_rdy': s.memresp[0].rdy, 'mem_req1_msg': s.memreq[1].msg, 'mem_req1_val': s.memreq[1].val, 'mem_req1_rdy': s.memreq[1].rdy, 'mem_resp1_msg': s.memresp[1].msg, 'mem_resp1_val': s.memresp[1].val, 'mem_resp1_rdy': s.memresp[1].rdy, })
def __init__( s, nports = 1, reqs = [ MemReqMsg(8,32,32) ], \ resps = [ MemRespMsg(8,32) ], mem_nbytes=1<<20 ): s.mem = TestMemoryFL(mem_nbytes) s.ifcs = [MemIfcCL((reqs[i], resps[i])) for i in xrange(nports)] s.reqs = reqs s.resps = resps s.req = [ValidEntry(False, None)] * nports s.resp = [ValidEntry(False, None)] * nports # for line trace s.nports = nports # Currently, only <=2 ports if nports >= 1: s.ifcs[0].req.enq |= s.recv0 s.ifcs[0].req.rdy |= s.recv_rdy0 if nports >= 2: s.ifcs[1].req.enq |= s.recv1 s.ifcs[1].req.rdy |= s.recv_rdy1 @s.update def up_testmem(): for i in xrange(nports): req = s.req[i] s.resp[i] = ValidEntry(False, None) if s.ifcs[i].resp.rdy() and req.val: req = req.msg len = req.len if req.len else (reqs[i].data.nbits >> 3) if req.type_ == MemReqMsg.TYPE_READ: resp = resps[i].mk_rd(req.opaque, len, s.mem.read(req.addr, len)) elif req.type_ == MemReqMsg.TYPE_WRITE: s.mem.write(req.addr, len, req.data) resp = resps[i].mk_wr(req.opaque) else: # AMOS resp = resps[i].mk_msg( req.type_, req.opaque, 0, len, \ s.mem.amo( req.type_, req.addr, len, req.data )) s.ifcs[i].resp.enq(resp) s.req[i] = ValidEntry(False, None) # clear pending req s.resp[i] = ValidEntry(True, resp) # for line trace for i in xrange(nports): s.add_constraints( U(up_testmem) < M( s.ifcs[i].req.enq), # pipe behavior, send < recv U(up_testmem) < M(s.ifcs[i].req.rdy), )
def req( type_, opaque, addr, len, data ): msg = MemReqMsg(8,32,128) if type_ == 'rd': msg.type_ = MemReqMsg.TYPE_READ elif type_ == 'wr': msg.type_ = MemReqMsg.TYPE_WRITE elif type_ == 'in': msg.type_ = MemReqMsg.TYPE_WRITE_INIT msg.addr = addr msg.opaque = opaque msg.len = len msg.data = data return msg
def __init__(s, src_msgs, sink_msgs, stall_prob, latency, src_delay, sink_delay, dump_vcd): # Here we assume 16B cacheline, so the bank bits are slice(4, 6) # +--------------------------+--------------+--------+--------+--------+ # | 22b | 4b | 2b | 2b | 2b | # | tag | index |bank idx| offset | subwd | # +--------------------------+--------------+--------+--------+--------+ src_msg = [[], [], [], []] sink_msg = [[], [], [], []] for i in xrange(len(src_msgs)): src_msg[src_msgs[i].addr[5:7].uint()].append(src_msgs[i]) sink_msg[src_msgs[i].addr[5:7].uint()].append(sink_msgs[i]) # Instantiate models s.src = [ TestSource(MemReqMsg(8, 32, 32), src_msg[i], src_delay) for i in xrange(4) ] s.cachenet = CacheNetRTL() s.mem = TestMemory(MemMsg(8, 32, 32), 4, stall_prob, latency) s.sink = [ TestNetCacheSink(MemRespMsg(8, 32), sink_msg[i], sink_delay) for i in xrange(4) ] # Dump VCD if dump_vcd: s.cachenet.vcd_file = dump_vcd if hasattr(s.cachenet, 'inner'): s.cachenet.inner.vcd_file = dump_vcd # s.cache.vcd_file = dump_vcd # if hasattr(s.cache, 'inner'): # s.cache.inner.vcd_file = dump_vcd # Connect for i in xrange(4): s.connect(s.src[i].out, s.cachenet.procreq[i]) s.connect(s.sink[i].in_, s.cachenet.procresp[i]) for i in xrange(4): s.connect(s.cachenet.cachereq[i], s.mem.reqs[i]) s.connect(s.cachenet.cacheresp[i], s.mem.resps[i])
def __init__( s, idx_shamt ): # Parameters o = 8 # Short name for opaque bitwidth abw = 32 # Short name for addr bitwidth dbw = 32 # Short name for data bitwidth clw = 128 # Short name for cacheline bitwidth nbl = 16 # Short name for number of cache blocks, 256*8/128 = 16 idw = 3 # Short name for index width, clog2(16)-1 = 3 (-1 for 2-way) ofw = 4 # Short name for offset bit width, clog2(128/8) = 4 nby = 16 # Short name for number of cache blocks per way, 16/1 = 16 # In the lab, to simplify things, we always use all bits except for # the offset bits to represent the tag instead of storing the 25-bit # tag and concatenate everytime with the index bits and even the bank # bits to get the address of a cacheline tgw = 28 # Short name for tag bit width, 32-4 = 28 #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Cache request s.cachereq_msg = InPort ( MemReqMsg(o, abw, dbw) ) # Cache response s.cacheresp_msg = OutPort( MemRespMsg(o, dbw) ) # Memory request s.memreq_msg = OutPort( MemReqMsg(o, abw, clw) ) # Memory response s.memresp_msg = InPort ( MemRespMsg(o, clw) )
def __init__( s, nports = 1, req_types = [ MemReqMsg(8,32,32) ], \ resp_types = [ MemRespMsg(8,32) ], mem_nbytes=1<<20 ): s.mem = TestMemory(mem_nbytes) s.reqs = [InValRdyIfc(req_types[i]) for i in xrange(nports)] s.resps = [OutValRdyIfc(resp_types[i]) for i in xrange(nports)] s.resp_qs = [PipeQueue1RTL(resp_types[i]) for i in xrange(nports)] for i in xrange(nports): s.connect(s.resps[i], s.resp_qs[i].deq) s.req_types = req_types s.resp_types = resp_types s.nports = nports @s.update def up_set_rdy(): for i in xrange(nports): s.reqs[i].rdy = s.resp_qs[i].enq.rdy @s.update def up_process_memreq(): for i in xrange(nports): s.resp_qs[i].enq.val = Bits1(0) if s.reqs[i].val & s.resp_qs[i].enq.rdy: req = s.reqs[i].msg len = req.len if req.len else ( s.reqs[i].msg.data.nbits >> 3) if req.type_ == MemReqMsg.TYPE_READ: resp = s.resp_types[i].mk_rd(req.opaque, len, s.mem.read(req.addr, len)) elif req.type_ == MemReqMsg.TYPE_WRITE: s.mem.write(req.addr, len, req.data) resp = s.resp_types[i].mk_wr(req.opaque) else: # AMOS resp = s.resp_types[i].mk_msg( req.type_, req.opaque, 0, len, \ s.mem.amo( req.type_, req.addr, len, req.data )) s.resp_qs[i].enq.val = Bits1(1) s.resp_qs[i].enq.msg = resp
def __init__(s): # interface s.in_control = InValRdyBundle(inst_msg()) # control word s.memresp = InValRdyBundle(MemMsg(32, nWid).resp) # 0,1: right/left neighbors # 2,3: length-2 right/left PEs # 4,5: length-4 right/left PEs s.in_neighbor = InValRdyBundle[6](nWid) s.memreq = OutValRdyBundle(MemMsg(32, nWid).req) s.out_fsm = OutValRdyBundle(1) # response to FSM s.out_neighbor = OutValRdyBundle[6](nWid) # Queues s.memreq_q = SingleElementBypassQueue(MemReqMsg(32, nWid)) s.connect(s.memreq, s.memreq_q.deq) #s.memresp_q = SingleElementPipelinedQueue( MemRespMsg(16) ) s.memresp_q = SingleElementBypassQueue(MemRespMsg(nWid)) s.connect(s.memresp, s.memresp_q.enq) # temporarily store destination for non-blocking loads s.memdes_q = NormalQueue(nMemInst, nDesField) # PE local register file s.rf = RegisterFile(nWid, nReg, 2) # 2 read ports # approx_mul # input is done by reqMsg(src0, src1) done in control plane. s.approx_mul = CombinationalApproxMult(nWid / 2, 4) s.mul_msg = MymultReqMsg(nWid / 2) # temporary variables s.src0_tmp = Wire(nWid) s.src1_tmp = Wire(nWid) s.des_tmp = Wire(nWid) s.go = Wire(1) s.go_req = Wire(1) s.go_resp = Wire(1) s.go_mul = Wire(1) s.reqsent = RegRst(1, 0)
def __init__(s, mapper_num=2, reducer_num=1): # Interface s.wcreq = InValRdyBundle(WordcountReqMsg()) s.wcresp = OutValRdyBundle(WordcountRespMsg()) s.memreq = OutValRdyBundle(MemReqMsg(8, 32, 32)) s.memresp = InValRdyBundle(MemRespMsg(8, 32)) # Framework Components s.map = MapperPRTL[mapper_num]() s.red = ReducerPRTL[reducer_num]() s.sche = SchedulerPRTL() # Connect Framework Components for i in xrange(mapper_num): s.connect_pairs( s.sche.map_req[i], s.map[i].req, s.sche.map_resp[i], s.map[i].resp, ) for i in xrange(reducer_num): s.connect_pairs( s.sche.red_req[i], s.red[i].req, s.sche.red_resp[i], s.red[i].resp, ) s.connect_pairs( s.sche.gmem_req, s.memreq, s.sche.gmem_resp, s.memresp, s.wcreq, s.sche.in_, s.wcresp, s.sche.out, )
def __init__( s ): # MemReqMsg(addr_nbits, data_nbits) is a BitStruct datatype: # +------+-----------+------+-----------+ # | type | addr | len | data | # +------+-----------+------+-----------+ dtype = MemReqMsg( 32, 32 ) s.in_ = InPort( dtype ) @s.tick def logic(): # BitStructs are subclasses of Bits, we can slice them addr, data = s.in_[34:66], s.in_[0:32] # ... but it's usually more convenient to use fields! addr, data = s.in_.addr, s.in_.data assert s.in_[34:66] == s.in_.addr assert s.in_[ 0:32] == s.in_.data print addr, data
def __init__(s, num_banks=0, size=256, opaque_nbits=8, addr_nbits=32, data_nbits=32, cacheline_nbits=128): # Banking idx_shamt = clog2(num_banks) if num_banks > 0 else 0 #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Proc <-> Cache s.cachereq = InValRdyBundle( MemReqMsg(opaque_nbits, addr_nbits, data_nbits)) s.cacheresp = OutValRdyBundle(MemRespMsg(opaque_nbits, data_nbits)) # Cache <-> Mem s.memreq = OutValRdyBundle( MemReqMsg(opaque_nbits, addr_nbits, cacheline_nbits)) s.memresp = InValRdyBundle(MemRespMsg(opaque_nbits, cacheline_nbits)) #--------------------------------------------------------------------- # Control #--------------------------------------------------------------------- # pass through val/rdy signals s.connect(s.cachereq.val, s.memreq.val) s.connect(s.cachereq.rdy, s.memreq.rdy) s.connect(s.memresp.val, s.cacheresp.val) s.connect(s.memresp.rdy, s.cacheresp.rdy) #--------------------------------------------------------------------- # Datapath #--------------------------------------------------------------------- @s.combinational def logic(): # Pass through requests: just copy all of the fields over, except # we zero extend the data field. if s.cachereq.msg.len == 0: len_ = 4 else: len_ = s.cachereq.msg.len if s.cachereq.msg.type_ == MemReqMsg.TYPE_WRITE_INIT: s.memreq.msg.type_.value = MemReqMsg.TYPE_WRITE else: s.memreq.msg.type_.value = s.cachereq.msg.type_ s.memreq.msg.opaque.value = s.cachereq.msg.opaque s.memreq.msg.addr.value = s.cachereq.msg.addr s.memreq.msg.len.value = len_ s.memreq.msg.data.value = zext(s.cachereq.msg.data, 128) # Pass through responses: just copy all of the fields over, except # we truncate the data field. len_ = s.memresp.msg.len if len_ == 4: len_ = 0 s.cacheresp.msg.type_.value = s.memresp.msg.type_ s.cacheresp.msg.opaque.value = s.memresp.msg.opaque s.cacheresp.msg.test.value = 0 # "miss" s.cacheresp.msg.len.value = len_ s.cacheresp.msg.data.value = s.memresp.msg.data[0:32]
def req(type_, opaque, addr, len, data): return MemReqMsg(8, 32, 32).mk_msg(req_type_dict[type_], opaque, addr, len, data)
def __init__(s, mapper_num=2, reducer_num=1): # Top Level Interface s.in_ = InValRdyBundle(WordcountReqMsg()) s.out = OutValRdyBundle(WordcountRespMsg()) s.reference = InPort(32) s.base = InPort(32) s.size = InPort(32) # Global Memory Interface s.gmem_req = OutValRdyBundle(MemReqMsg(8, 32, 32)) s.gmem_resp = InValRdyBundle(MemRespMsg(8, 32)) # Local Memory Interface s.lmem_req = OutValRdyBundle(MemReqMsg(8, 32, 32)) s.lmem_resp = InValRdyBundle(MemRespMsg(8, 32)) # Mapper Interface s.map_req = OutValRdyBundle[mapper_num](MapperReqMsg()) s.map_resp = InValRdyBundle[mapper_num](MapperRespMsg()) # Reducer Interface s.red_req = OutValRdyBundle[reducer_num](ReducerReqMsg()) s.red_resp = InValRdyBundle[reducer_num](ReducerRespMsg()) # Task Queue s.task_queue = NormalQueue(2, Bits(32)) # Idle Queue storing mapper ID s.idle_queue = NormalQueue(2, Bits(2)) # States s.STATE_IDLE = 0 # Idle state, scheduler waiting for top level to start s.STATE_SOURCE = 1 # Source state, handling with Test Source, getting base, size, ref info s.STATE_INIT = 2 # Init state, scheduler assigns input info to each Mapper s.STATE_START = 3 # Start state, scheduler starts scheduling s.STATE_END = 4 # End state, shceduler loads all task from global memory and it is done s.state = RegRst(4, reset_value=s.STATE_IDLE) # Counters s.init_count = Wire(2) s.input_count = Wire(32) @s.tick def counter(): if (s.idle_queue.enq.val and s.init): s.init_count.next = s.init_count + 1 if (s.gmem_req.val): s.input_count.next = s.input_count + 1 # Signals s.go = Wire(1) # go signal tells scheduler to start scheduling s.mapper_done = Wire(1) # if one or more mapper is done and send resp s.init = Wire(1) # init signal indicates scheduler at initial state s.end = Wire(1) # end signal indicates all task are loaded s.done = Wire(1) # done signal indicates everything is done s.num_task_queue = Wire(2) s.connect(s.task_queue.num_free_entries, s.num_task_queue) @s.combinational def logic(): s.mapper_done.value = s.map_resp[0].val | s.map_resp[1].val #--------------------------------------------------------------------- # Assign Task to Mapper Combinational Logic #--------------------------------------------------------------------- @s.combinational def mapper(): # initialize mapper req and resp handshake signals for i in xrange(mapper_num): s.map_req[i].val.value = 0 s.task_queue.deq.rdy.value = 0 s.idle_queue.deq.rdy.value = 0 if s.init: s.map_req[s.init_count].msg.data.value = s.reference s.map_req[s.init_count].msg.type_.value = 1 s.map_req[s.init_count].val.value = 1 s.idle_queue.enq.msg.value = s.init_count s.idle_queue.enq.val.value = 1 else: # assign task to mapper if task queue is ready to dequeue # idle queue is ready to dequeue and mapper is ready to take request if (s.task_queue.deq.val and s.idle_queue.deq.val and s.map_req[s.idle_queue.deq.msg].rdy): s.map_req[s.idle_queue.deq. msg].msg.data.value = s.task_queue.deq.msg[0:8] s.map_req[s.idle_queue.deq.msg].msg.type_.value = 0 s.map_req[s.idle_queue.deq.msg].val.value = 1 s.task_queue.deq.rdy.value = 1 s.idle_queue.deq.rdy.value = 1 #--------------------------------------------------------------------- # Send Mapper Resp to Reducer Combinational Logic #--------------------------------------------------------------------- @s.combinational def reducer(): # initialize mapper and reducer handshake signals for i in xrange(mapper_num): s.map_resp[i].rdy.value = 0 for i in xrange(reducer_num): s.red_req[i].val.value = 0 #s.idle_queue.enq.val.value = 0 # get the mapper response, assign the response to reducer if (s.mapper_done): # Check each mapper response, add it to idle queue, send its response # to Reducer, mark its response ready for i in xrange(mapper_num): if (s.map_resp[i].val): if ~s.init: if s.idle_queue.enq.rdy: s.idle_queue.enq.msg.value = i s.idle_queue.enq.val.value = 1 if s.red_req[0].rdy: if s.end and s.num_task_queue == 2: s.red_req[0].msg.data.value = s.map_resp[ i].msg.data s.red_req[0].msg.type_.value = 1 s.red_req[0].val.value = 1 s.done.value = 1 else: s.red_req[0].msg.data.value = s.map_resp[ i].msg.data s.red_req[0].msg.type_.value = 0 s.red_req[0].val.value = 1 s.map_resp[i].rdy.value = 1 break #--------------------------------------------------------------------- # Task State Transition Logic #--------------------------------------------------------------------- @s.combinational def state_transitions(): curr_state = s.state.out next_state = s.state.out if (curr_state == s.STATE_IDLE): if (s.in_.val): next_state = s.STATE_SOURCE if (curr_state == s.STATE_SOURCE): if (s.go): next_state = s.STATE_INIT elif (s.done and s.red_resp[0].val): next_state = s.STATE_IDLE if (curr_state == s.STATE_INIT): if (s.init_count == mapper_num - 1): next_state = s.STATE_START if (curr_state == s.STATE_START): if (s.input_count == s.size - 1): next_state = s.STATE_END if (curr_state == s.STATE_END): if (s.done): next_state = s.STATE_SOURCE s.state.in_.value = next_state #--------------------------------------------------------------------- # Task State Output Logic #--------------------------------------------------------------------- @s.combinational def state_outputs(): current_state = s.state.out s.gmem_req.val.value = 0 s.gmem_resp.rdy.value = 0 s.in_.rdy.value = 0 s.out.val.value = 0 s.task_queue.enq.val.value = 0 # In IDLE state if (current_state == s.STATE_IDLE): s.init_count.value = 0 s.input_count.value = 0 s.end.value = 0 s.go.value = 0 s.init.value = 0 s.done.value = 0 if s.in_.val: if (s.in_.msg.addr == 1): s.base.value = s.in_.msg.data s.in_.rdy.value = 1 s.out.msg.type_.value = WordcountReqMsg.TYPE_WRITE s.out.msg.data.value = 0 s.out.val.value = 1 #In SOURCE state if (current_state == s.STATE_SOURCE): if (s.in_.val and s.out.rdy): if (s.in_.msg.type_ == WordcountReqMsg.TYPE_WRITE): if (s.in_.msg.addr == 0): s.go.value = 1 elif (s.in_.msg.addr == 2): s.size.value = s.in_.msg.data elif (s.in_.msg.addr == 3): s.reference.value = s.in_.msg.data # Send xcel response message s.in_.rdy.value = 1 s.out.msg.type_.value = WordcountReqMsg.TYPE_WRITE s.out.msg.data.value = 0 s.out.val.value = 1 elif (s.in_.msg.type_ == WordcountReqMsg.TYPE_READ): if (s.done and s.red_resp[0].val): s.out.msg.type_.value = WordcountReqMsg.TYPE_READ s.out.msg.data.value = s.red_resp[0].msg.data s.red_resp[0].rdy.value = 1 s.in_.rdy.value = 1 s.out.val.value = 1 # In INIT state if (current_state == s.STATE_INIT): s.init.value = 1 s.go.value = 0 # at the last 2 cycle of init, send read req to global memory if s.init_count == mapper_num - 2: if s.gmem_req.rdy: s.gmem_req.msg.addr.value = s.base + (4 * s.input_count) s.gmem_req.msg.type_.value = TYPE_READ s.gmem_req.val.value = 1 # at the last cycle of init, receive read resp to global memory, put it in task queue # send another read req to global memory if s.init_count == mapper_num - 1: if s.gmem_resp.val and s.gmem_req.rdy: s.task_queue.enq.msg.value = s.gmem_resp.msg s.task_queue.enq.val.value = 1 s.gmem_resp.rdy.value = 1 s.gmem_req.msg.addr.value = s.base + (4 * s.input_count) s.gmem_req.msg.type_.value = TYPE_READ s.gmem_req.val.value = 1 # In START state if (current_state == s.STATE_START): s.init.value = 0 if s.gmem_resp.val and s.gmem_req.rdy: s.task_queue.enq.msg.value = s.gmem_resp.msg s.task_queue.enq.val.value = 1 s.gmem_resp.rdy.value = 1 s.gmem_req.msg.addr.value = s.base + (4 * s.input_count) s.gmem_req.msg.type_.value = TYPE_READ s.gmem_req.val.value = 1 # In END state if (current_state == s.STATE_END): if s.gmem_resp.val: s.task_queue.enq.msg.value = s.gmem_resp.msg s.task_queue.enq.val.value = 1 s.gmem_resp.rdy.value = 1 s.end.value = 1
def __init__(s, idx_shamt): # Parameters o = 8 # Short name for opaque bitwidth abw = 32 # Short name for addr bitwidth dbw = 32 # Short name for data bitwidth clw = 128 # Short name for cacheline bitwidth nbl = 16 # Short name for number of cache blocks, 256*8/128 = 16 idw = 4 # Short name for index width, clog2(16) = 4 ofw = 4 # Short name for offset bit width, clog2(128/8) = 4 nby = 16 # Short name for number of cache blocks per way, 16/1 = 16 # In the lab, to simplify things, we always use all bits except for # the offset bits to represent the tag instead of storing the 24-bit # tag and concatenate everytime with the index bits and even the bank # bits to get the address of a cacheline tgw = 28 # Short name for tag bit width, 32-4 = 28 #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Cache request s.cachereq_msg = InPort(MemReqMsg(o, abw, dbw)) # Cache response s.cacheresp_msg = OutPort(MemRespMsg(o, dbw)) # Memory request s.memreq_msg = OutPort(MemReqMsg(o, abw, clw)) # Memory response s.memresp_msg = InPort(MemRespMsg(o, clw)) # Control signals (ctrl->dpath) s.cachereq_en = InPort(1) s.memresp_en = InPort(1) s.write_data_mux_sel = InPort(1) s.tag_array_ren = InPort(1) s.tag_array_wen = InPort(1) s.tag_array_en = InPort(1) s.data_array_ren = InPort(1) s.data_array_wen = InPort(1) s.data_array_en = InPort(1) s.data_array_wben = InPort(clw / 8) s.read_data_reg_en = InPort(1) s.cacheresp_type = InPort(3) s.evict_addr_reg_en = InPort(1) s.memreq_addr_mux_sel = InPort(1) s.memreq_type = InPort(3) s.hit = InPort(2) s.read_word_mux_sel = InPort(3) # Status signals (dpath->Ctrl) s.cachereq_type = OutPort(3) s.cachereq_addr = OutPort(32) s.tag_match = OutPort(1) s.cachereq_msg_opaque = s.cachereq_msg[66:74] s.cachereq_msg_type = s.cachereq_msg[74:77] s.cachereq_msg_addr = s.cachereq_msg[34:66] s.cachereq_msg_data = s.cachereq_msg[0:32] s.memresp_msg_data = s.memresp_msg[0:128] #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' # Before Tag array #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' s.cachereq_opaque_reg = m = RegEnRst(dtype=8) s.connect_pairs( m.en, s.cachereq_en, m.in_, s.cachereq_msg_opaque, ) s.cachereq_type_reg = m = RegEnRst(dtype=3) s.connect_pairs(m.en, s.cachereq_en, m.in_, s.cachereq_msg_type, m.out, s.cachereq_type) s.cachereq_addr_reg = m = RegEnRst(dtype=32) s.connect_pairs( m.en, s.cachereq_en, m.in_, s.cachereq_msg_addr, ) s.connect(s.cachereq_addr_reg.out, s.cachereq_addr) s.cachereq_data_reg = m = RegEnRst(dtype=32) s.connect_pairs( m.en, s.cachereq_en, m.in_, s.cachereq_msg_data, ) s.idx = s.cachereq_addr_reg.out[4 + idx_shamt:8 + idx_shamt] #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' # Tag array #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' s.tag_array_read_data = Wire(28) s.tag_array = m = SRAMBitsComb_rst_1rw(num_entries=16, data_nbits=28) s.connect_pairs(m.wen, s.tag_array_en, m.addr, s.idx, m.wdata, s.cachereq_addr_reg.out[4:32], m.rdata, s.tag_array_read_data) #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' # After Tag array #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' s.mkaddr1_out = Wire(32) s.mkaddr2_out = Wire(32) s.mkaddr1 = m = mkaddr() s.connect_pairs(m.mkaddr_in, s.cachereq_addr_reg.out[4:32], m.mkaddr_out, s.mkaddr1_out) s.mkaddr2 = m = mkaddr() s.connect_pairs(m.mkaddr_in, s.tag_array_read_data, m.mkaddr_out, s.mkaddr2_out) s.evict_addr_reg = m = RegEnRst(dtype=32) s.connect_pairs(m.en, s.evict_addr_reg_en, m.in_, s.mkaddr1_out) s.memreq_addr_mux = m = Mux(dtype=32, nports=2) s.connect_pairs(m.in_[0], s.evict_addr_reg.out, m.in_[1], s.mkaddr2_out, m.sel, s.memreq_addr_mux_sel) s.comppart = m = comppart() s.connect_pairs(m.one_in, s.cachereq_addr_reg.out[4:32], m.two_in, s.tag_array_read_data, m.cmp_out, s.tag_match) #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' # Before Data array #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' s.memresp_data_reg = m = RegEnRst(dtype=128) s.connect_pairs(m.en, s.memresp_en, m.in_, s.memresp_msg_data) s.repl_out = Wire(128) s.repl = m = repl() s.connect_pairs(m.repl_in, s.cachereq_data_reg.out, m.repl_out, s.repl_out) s.write_data_mux = m = Mux(dtype=128, nports=2) s.connect_pairs(m.in_[0], s.repl_out, m.in_[1], s.memresp_data_reg.out, m.sel, s.write_data_mux_sel) #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' # Data array #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' s.data_array_read_data = Wire(128) s.data_array = m = SRAMBytesComb_rst_1rw(num_entries=16, num_nbytes=16) s.connect_pairs(m.wen, s.data_array_en, m.addr, s.idx, m.wdata, s.write_data_mux.out, m.rdata, s.data_array_read_data, m.wben, s.data_array_wben) #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' # After Data array #''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' s.read_data_reg = m = RegEnRst(dtype=128) s.connect_pairs(m.en, s.read_data_reg_en, m.in_, s.data_array_read_data) s.read_word_mux = m = Mux(dtype=32, nports=5) s.connect_pairs(m.in_[3], s.read_data_reg.out[96:128], m.in_[2], s.read_data_reg.out[64:96], m.in_[1], s.read_data_reg.out[32:64], m.in_[0], s.read_data_reg.out[0:32], m.in_[4], 0, m.sel, s.read_word_mux_sel) # Concat s.connect_pairs(s.memreq_msg.type_, s.memreq_type, s.memreq_msg.opaque, 0, s.memreq_msg.addr, s.memreq_addr_mux.out, s.memreq_msg.len, 0, s.memreq_msg.data, s.read_data_reg.out) s.connect_pairs(s.cacheresp_msg.type_, s.cacheresp_type, s.cacheresp_msg.opaque, s.cachereq_opaque_reg.out, s.cacheresp_msg.test, s.hit, s.cacheresp_msg.len, 0, s.cacheresp_msg.data, s.read_word_mux.out)
def __init__(s, num_banks=0): # Parameters idx_shamt = clog2(num_banks) if num_banks > 0 else 0 size = 256 # 256 bytes opaque_nbits = 8 # 8-bit opaque field addr_nbits = 32 # 32-bit address data_nbits = 32 # 32-bit data access cacheline_nbits = 128 # 128-bit cacheline #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Proc <-> Cache s.cachereq = InValRdyBundle( MemReqMsg(opaque_nbits, addr_nbits, data_nbits)) s.cacheresp = OutValRdyBundle(MemRespMsg(opaque_nbits, data_nbits)) # Cache <-> Mem s.memreq = OutValRdyBundle( MemReqMsg(opaque_nbits, addr_nbits, cacheline_nbits)) s.memresp = InValRdyBundle(MemRespMsg(opaque_nbits, cacheline_nbits)) s.ctrl = BlockingCacheAltCtrlPRTL(idx_shamt) s.dpath = BlockingCacheAltDpathPRTL(idx_shamt) # Ctrl s.connect_pairs(s.ctrl.cachereq_val, s.cachereq.val, s.ctrl.cachereq_rdy, s.cachereq.rdy, s.ctrl.cacheresp_val, s.cacheresp.val, s.ctrl.cacheresp_rdy, s.cacheresp.rdy, s.ctrl.memreq_val, s.memreq.val, s.ctrl.memreq_rdy, s.memreq.rdy, s.ctrl.memresp_val, s.memresp.val, s.ctrl.memresp_rdy, s.memresp.rdy) # Dpath s.connect_pairs(s.dpath.cachereq_msg, s.cachereq.msg, s.dpath.cacheresp_msg, s.cacheresp.msg, s.dpath.memreq_msg, s.memreq.msg, s.dpath.memresp_msg, s.memresp.msg) # Ctrl <-> Dpath s.connect_auto(s.ctrl, s.dpath) # State dictionary s.state_map = { s.ctrl.STATE_IDLE: 'I', s.ctrl.STATE_TAG_CHECK: 'TC', s.ctrl.STATE_INIT_DATA_ACCESS: 'IN', s.ctrl.STATE_READ_DATA_ACCESS: 'RD', s.ctrl.STATE_WRITE_DATA_ACCESS: 'WD', s.ctrl.STATE_EVICT_PREPARE: 'EP', s.ctrl.STATE_EVICT_REQUEST: 'ER', s.ctrl.STATE_EVICT_WAIT: 'EW', s.ctrl.STATE_REFILL_REQUEST: 'RR', s.ctrl.STATE_REFILL_WAIT: 'RW', s.ctrl.STATE_REFILL_UPDATE: 'RU', s.ctrl.STATE_WAIT: 'W' }
def __init__(s, mapper_num=30, reducer_num=10, k=3, nbits=6, train_size=600): TRAIN_DATA = train_size sum_nbits = int(math.ceil(math.log((2**nbits - 1) * k, 2))) # Interface s.direq = InValRdyBundle(digitrecReqMsg()) s.diresp = OutValRdyBundle(digitrecRespMsg()) s.memreq = OutValRdyBundle(MemReqMsg(8, 32, 64)) s.memresp = InValRdyBundle(MemRespMsg(8, 64)) # Framework Components s.map = MapperPRTL[mapper_num]() s.red = ReducerPRTL[reducer_num](mapper_num / reducer_num, nbits, k, 50) s.mer = MergerPRTL(reducer_num, sum_nbits) s.sche = SchedulerPRTL(mapper_num=mapper_num, reducer_num=reducer_num, train_size=TRAIN_DATA) # Assign Register File to Mapper s.train_data = m = RegisterFile[DIGIT](dtype=Bits(DATA_BITS), nregs=TRAIN_DATA, rd_ports=mapper_num / DIGIT, wr_ports=1, const_zero=False) # Connect Registerfile read port to Mapper for i in xrange(DIGIT): for j in xrange(mapper_num / DIGIT): s.connect( m[i].rd_data[j], s.map[j * 10 + i].in0, ) # Connect Registerfile write port to Scheduler for i in xrange(DIGIT): s.connect_dict({ m[i].wr_addr: s.sche.regf_addr[i], m[i].wr_data: s.sche.regf_data[i], m[i].wr_en: s.sche.regf_wren[i], }) # Connect Registerfile rd port to Scheduler for i in xrange(DIGIT): for j in xrange(mapper_num / reducer_num): s.connect( m[i].rd_addr[j], s.sche.regf_rdaddr[j], ) # Connect Mapper test data port to Scheduler for i in xrange(mapper_num): s.connect(s.sche.map_req[i], s.map[i].in1) # Connect Mapper Output to Reducer # for 3 mapper : 1 reducer, mapper 0, 10, 20 connect to reducer 0, etc for i in xrange(reducer_num): for j in xrange(mapper_num / reducer_num): s.connect_pairs( s.map[i + 10 * j].out, s.red[i].in_[j], ) # Connect rst signal from Scheduler to Reducer for i in xrange(reducer_num): s.connect_pairs( s.sche.red_rst, s.red[i].rst, ) # Connect Reducer Output to Merger for i in xrange(reducer_num): s.connect_pairs(s.red[i].out, s.mer.in_[i]) # Connect Merger output to Scheduler s.connect(s.mer.out, s.sche.merger_resp) # Connect global memory and top level to scheduler s.connect_pairs( s.sche.gmem_req, s.memreq, s.sche.gmem_resp, s.memresp, s.direq, s.sche.in_, s.diresp, s.sche.out, )
def __init__(s, mapper_num=10, reducer_num=1, train_size=600): TRAIN_DATA = train_size TRAIN_LOG = int(math.ceil(math.log(TRAIN_DATA, 2))) # import training data and store them into array training_data = [] for i in xrange(DIGIT): count = 0 filename = 'data/training_set_' + str(i) + '.dat' with open(filename, 'r') as f: for L in f: if (count > TRAIN_DATA - 1): break training_data.append(int(L.replace(',\n', ''), 16)) count = count + 1 # Top Level Interface s.in_ = InValRdyBundle(digitrecReqMsg()) s.out = OutValRdyBundle(digitrecRespMsg()) s.base = InPort(32) s.size = InPort(32) # Global Memory Interface s.gmem_req = OutValRdyBundle(MemReqMsg(8, 32, 64)) s.gmem_resp = InValRdyBundle(MemRespMsg(8, 64)) # Register File Interface s.regf_addr = OutPort[DIGIT](TRAIN_LOG) s.regf_data = OutPort[DIGIT](DATA_BITS) s.regf_wren = OutPort[DIGIT](1) s.regf_rdaddr = OutPort[mapper_num / reducer_num](TRAIN_LOG) # Mapper Interface s.map_req = OutPort[mapper_num](DATA_BITS) # Reducer Reset s.red_rst = OutPort(1) # Merger Interface s.merger_resp = InPort(DIGIT_LOG) # States s.STATE_IDLE = 0 # Idle state, scheduler waiting for top level to start s.STATE_SOURCE = 1 # Source state, handling with Test Source, getting base, size, ref info s.STATE_INIT = 2 # Init state, scheduler assigns input info to each Mapper s.STATE_START = 3 # Start state, scheduler gets test data, starts distributing and sorting s.STATE_WRITE = 4 # Write state, scheduler writes merger data to memory s.STATE_END = 5 # End state, shceduler loads all task from global memory and it is done s.state = RegRst(4, reset_value=s.STATE_IDLE) # Counters s.input_count = Wire(TEST_LOG) s.result_count = Wire(TEST_LOG) s.train_count_rd = Wire(TRAIN_LOG) s.train_count_wr = Wire(32) s.train_data_wr = Wire(1) s.train_data_rd = Wire(1) # Logic to Increment Counters @s.tick def counter(): if (s.gmem_req.val and s.gmem_req.msg.type_ == TYPE_READ): s.input_count.next = s.input_count + 1 if (s.gmem_req.val and s.gmem_req.msg.type_ == TYPE_WRITE): s.result_count.next = s.result_count + 1 if s.rst: s.train_count_rd.next = 0 elif s.train_data_rd: s.train_count_rd.next = s.train_count_rd + (mapper_num / DIGIT) if (s.train_data_wr): s.train_count_wr.next = s.train_count_wr + 1 # Signals s.go = Wire(1) # go signal tells scheduler to start scheduling s.done = Wire(1) # done signal indicates everything is done s.rst = Wire(1) # reset train count every test data processed # Reference data s.reference = Reg(dtype=DATA_BITS) # reference stores test data #--------------------------------------------------------------------- # Initialize Register File for Training data #--------------------------------------------------------------------- @s.combinational def traindata(): if s.train_data_wr: for i in xrange(DIGIT): s.regf_addr[i].value = s.train_count_wr s.regf_data[i].value = training_data[i * TRAIN_DATA + s.train_count_wr] s.regf_wren[i].value = 1 else: for i in xrange(DIGIT): s.regf_wren[i].value = 0 #--------------------------------------------------------------------- # Assign Task to Mapper Combinational Logic #--------------------------------------------------------------------- @s.combinational def mapper(): # broadcast train data to mapper for i in xrange(DIGIT): for j in xrange(mapper_num / DIGIT): if (s.train_data_rd): s.map_req[j * 10 + i].value = s.reference.out s.regf_rdaddr[j].value = s.train_count_rd + j #--------------------------------------------------------------------- # Task State Transition Logic #--------------------------------------------------------------------- @s.combinational def state_transitions(): curr_state = s.state.out next_state = s.state.out if (curr_state == s.STATE_IDLE): if (s.in_.val): next_state = s.STATE_SOURCE if (curr_state == s.STATE_SOURCE): if (s.go): next_state = s.STATE_INIT elif (s.done): next_state = s.STATE_IDLE if (curr_state == s.STATE_INIT): if (s.train_count_wr == TRAIN_DATA - 1): next_state = s.STATE_START if (curr_state == s.STATE_START): if (s.train_count_rd == TRAIN_DATA - (mapper_num / DIGIT)): next_state = s.STATE_WRITE if (curr_state == s.STATE_WRITE): if (s.input_count == s.size): next_state = s.STATE_END else: next_state = s.STATE_START if (curr_state == s.STATE_END): if s.gmem_resp.val: next_state = s.STATE_SOURCE s.state.in_.value = next_state #--------------------------------------------------------------------- # Task State Output Logic #--------------------------------------------------------------------- @s.combinational def state_outputs(): current_state = s.state.out s.gmem_req.val.value = 0 s.gmem_resp.rdy.value = 0 s.in_.rdy.value = 0 s.out.val.value = 0 # In IDLE state if (current_state == s.STATE_IDLE): s.input_count.value = 0 s.train_count_rd.value = 0 s.train_count_wr.value = 0 s.reference.value = 0 s.go.value = 0 s.train_data_rd.value = 0 s.train_data_wr.value = 0 s.done.value = 0 s.rst.value = 0 s.red_rst.value = 0 # In SOURCE state if (current_state == s.STATE_SOURCE): if (s.in_.val and s.out.rdy): if (s.in_.msg.type_ == digitrecReqMsg.TYPE_WRITE): if (s.in_.msg.addr == 0): # start computing s.go.value = 1 elif (s.in_.msg.addr == 1): # base address s.base.value = s.in_.msg.data elif (s.in_.msg.addr == 2): # size s.size.value = s.in_.msg.data # Send xcel response message s.in_.rdy.value = 1 s.out.msg.type_.value = digitrecReqMsg.TYPE_WRITE s.out.msg.data.value = 0 s.out.val.value = 1 elif (s.in_.msg.type_ == digitrecReqMsg.TYPE_READ): # the computing is done, send response message if (s.done): s.out.msg.type_.value = digitrecReqMsg.TYPE_READ s.out.msg.data.value = 1 s.in_.rdy.value = 1 s.out.val.value = 1 # In INIT state if (current_state == s.STATE_INIT): s.train_data_wr.value = 1 s.go.value = 0 # at the end of init, send read req to global memory if s.train_count_wr == TRAIN_DATA - 1: if s.gmem_req.rdy: s.gmem_req.msg.addr.value = s.base + (8 * s.input_count) s.gmem_req.msg.type_.value = TYPE_READ s.gmem_req.val.value = 1 s.red_rst.value = 1 # In START state if (current_state == s.STATE_START): s.train_data_wr.value = 0 s.train_data_rd.value = 1 s.rst.value = 0 s.red_rst.value = 0 if s.gmem_resp.val: # if response type is read, stores test data to reference, hold response val # until everything is done, which is set in WRITE state if s.gmem_resp.msg.type_ == TYPE_READ: s.gmem_resp.rdy.value = 1 s.reference.in_.value = s.gmem_resp.msg.data else: # if response tyle is write, set response rdy, send another req to # read test data s.gmem_resp.rdy.value = 1 s.gmem_req.msg.addr.value = s.base + (8 * s.input_count) s.gmem_req.msg.type_.value = TYPE_READ s.gmem_req.val.value = 1 s.red_rst.value = 1 s.train_data_rd.value = 0 # In WRITE state if (current_state == s.STATE_WRITE): s.train_data_rd.value = 0 # one test data done processed, write result from merger to memory if (s.gmem_req.rdy): s.gmem_req.msg.addr.value = 0x2000 + (8 * s.result_count) s.gmem_req.msg.data.value = s.merger_resp s.gmem_req.msg.type_.value = TYPE_WRITE s.gmem_req.val.value = 1 s.rst.value = 1 # In END state if (current_state == s.STATE_END): if s.gmem_resp.val: s.gmem_resp.rdy.value = 1 s.done.value = 1
def __init__(s): num_cores = 4 nopaque_nbits = 8 icache_nbytes = 256 dcache_nbytes = 256 mopaque_nbits = 8 addr_nbits = 32 data_nbits = 32 cacheline_nbits = 128 #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- s.procreq = InValRdyBundle[num_cores](MemReqMsg( mopaque_nbits, addr_nbits, data_nbits)) s.procresp = OutValRdyBundle[num_cores](MemRespMsg( mopaque_nbits, data_nbits)) s.mainmemreq = OutValRdyBundle( MemReqMsg(mopaque_nbits, addr_nbits, cacheline_nbits)) s.mainmemresp = InValRdyBundle( MemRespMsg(mopaque_nbits, cacheline_nbits)) # bring these statistics to upper level since the interfaces of this # module is hooked up to the networks, not the cache banks s.dcache_miss = OutPort(4) s.dcache_access = OutPort(4) #--------------------------------------------------------------------- # Components #--------------------------------------------------------------------- s.proc_dcache_net = CacheNetRTL() s.dcache = BlockingCacheAltRTL[num_cores](num_banks=num_cores) s.dcache_refill_net = MemNetRTL() #--------------------------------------------------------------------- # Connections #--------------------------------------------------------------------- for i in xrange(num_cores): # proc <-> proc_dcache_net s.connect(s.procreq[i], s.proc_dcache_net.procreq[i]) s.connect(s.procresp[i], s.proc_dcache_net.procresp[i]) # proc_dcache_net <-> dcache bank s.connect(s.proc_dcache_net.cachereq[i], s.dcache[i].cachereq) s.connect(s.proc_dcache_net.cacheresp[i], s.dcache[i].cacheresp) # dcache bank <-> dcache_refill_net s.connect(s.dcache[i].memreq, s.dcache_refill_net.memreq[i]) s.connect(s.dcache[i].memresp, s.dcache_refill_net.memresp[i]) # dcache_refill_net <-> dmemport s.connect(s.dcache_refill_net.mainmemreq[0], s.mainmemreq) s.connect(s.dcache_refill_net.mainmemresp[0], s.mainmemresp) # statistics @s.combinational def collect_cache_statistics(): s.dcache_miss.value = concat( s.dcache[3].cacheresp.rdy & s.dcache[3].cacheresp.val & \ (~s.dcache[3].cacheresp.msg.test[0]), s.dcache[2].cacheresp.rdy & s.dcache[2].cacheresp.val & \ (~s.dcache[2].cacheresp.msg.test[0]), s.dcache[1].cacheresp.rdy & s.dcache[1].cacheresp.val & \ (~s.dcache[1].cacheresp.msg.test[0]), s.dcache[0].cacheresp.rdy & s.dcache[0].cacheresp.val & \ (~s.dcache[0].cacheresp.msg.test[0]), ) s.dcache_access.value = concat( s.dcache[3].cachereq.rdy & s.dcache[3].cachereq.val, s.dcache[2].cachereq.rdy & s.dcache[2].cachereq.val, s.dcache[1].cachereq.rdy & s.dcache[1].cachereq.val, s.dcache[0].cachereq.rdy & s.dcache[0].cachereq.val)
def __init__(s, reset_vector=0, test_en=True): s.reset_vector = reset_vector s.test_en = test_en # Proc/Mngr Interface s.mngr2proc = InValRdyBundle(32) s.proc2mngr = OutValRdyBundle(32) # Instruction Memory Request/Response Interface s.imemreq = OutValRdyBundle(MemReqMsg(32, 32)) s.imemresp = InValRdyBundle(MemRespMsg(32)) # Data Memory Request/Response Interface s.dmemreq = OutValRdyBundle(MemReqMsg(32, 32)) s.dmemresp = InValRdyBundle(MemRespMsg(32)) # Accelerator Interface s.xcelreq = OutValRdyBundle(XcelReqMsg()) s.xcelresp = InValRdyBundle(XcelRespMsg()) # Memory Proxy s.imem = BytesMemPortAdapter(s.imemreq, s.imemresp) s.dmem = BytesMemPortAdapter(s.dmemreq, s.dmemresp) # Proc/Mngr Queue Adapters s.mngr2proc_q = InValRdyQueueAdapter(s.mngr2proc) s.proc2mngr_q = OutValRdyQueueAdapter(s.proc2mngr) # Accelerator Queue Adapters s.xcelreq_q = OutValRdyQueueAdapter(s.xcelreq) s.xcelresp_q = InValRdyQueueAdapter(s.xcelresp) # Extra Interfaces s.go = InPort(1) s.status = OutPort(32) s.stats_en = OutPort(1) s.num_insts = OutPort(32) # Construct the ISA semantics object s.isa = PisaSemantics(s.dmem, s.mngr2proc_q, s.proc2mngr_q) # We "monkey patch" the mtx/mfx execution functions so that they # interact with the above queue adapters def execute_mtx(s_, inst): s.xcelreq_q.append(XcelReqMsg().mk_wr(inst.rs, s_.R[inst.rt])) xcelresp_msg = s.xcelresp_q.popleft() s_.PC += 4 def execute_mfx(s_, inst): s.xcelreq_q.append(XcelReqMsg().mk_rd(inst.rs)) xcelresp_msg = s.xcelresp_q.popleft() s_.R[inst.rt] = xcelresp_msg.data s_.PC += 4 s.isa.execute_mtx = execute_mtx s.isa.execute_mfx = execute_mfx s.isa.execute_dispatch['mtx'] = s.isa.execute_mtx s.isa.execute_dispatch['mfx'] = s.isa.execute_mfx # Reset s.reset_proc()
def __init__( s, reset_vector=0, test_en=True ): s.reset_vector = reset_vector s.test_en = test_en # Proc/Mngr Interface s.mngr2proc = InValRdyBundle( 32 ) s.proc2mngr = OutValRdyBundle( 32 ) # Instruction Memory Request/Response Interface s.imemreq = OutValRdyBundle ( MemReqMsg(32,32) ) s.imemresp = InValRdyBundle ( MemRespMsg(32) ) # Data Memory Request/Response Interface s.dmemreq = OutValRdyBundle ( MemReqMsg(32,32) ) s.dmemresp = InValRdyBundle ( MemRespMsg(32) ) # Accelerator Interface s.xcelreq = OutValRdyBundle( XcelReqMsg() ) s.xcelresp = InValRdyBundle( XcelRespMsg() ) # Extra Interface s.go = InPort ( 1 ) s.status = OutPort ( 32 ) s.stats_en = OutPort ( 1 ) s.num_insts = OutPort ( 32 ) # Queue Adapters s.mngr2proc_q = InValRdyQueueAdapter ( s.mngr2proc ) s.proc2mngr_q = OutValRdyQueueAdapter ( s.proc2mngr ) s.imemreq_q = OutValRdyQueueAdapter ( s.imemreq ) s.imemresp_q = InValRdyQueueAdapter ( s.imemresp ) s.dmemreq_q = OutValRdyQueueAdapter ( s.dmemreq ) s.dmemresp_q = InValRdyQueueAdapter ( s.dmemresp ) s.xcelreq_q = OutValRdyQueueAdapter ( s.xcelreq ) s.xcelresp_q = InValRdyQueueAdapter ( s.xcelresp ) # Helpers to make memory read/write requests mem_ifc_types = MemMsg(32,32) s.mk_rd = mem_ifc_types.req.mk_rd s.mk_wr = mem_ifc_types.req.mk_wr # Pipeline queues s.pc_queue_FX = Queue(4) s.inst_queue_XW = Queue(4) s.wb_queue_XW = Queue(4) s.R = ParcProcCL.RegisterFile() s.stall_X = False s.stall_W = False s.stall_type_X = " " s.stall_type_W = " " s.WB_NONE = 0 s.WB_REGWR = 1 s.ifetch_wait = 0 # Reset s.reset_proc()
def __init__(s, idx_shamt): # Parameters o = 8 # Short name for opaque bitwidth abw = 32 # Short name for addr bitwidth dbw = 32 # Short name for data bitwidth clw = 128 # Short name for cacheline bitwidth nbl = 16 # Short name for number of cache blocks, 256*8/128 = 16 idw = 3 # Short name for index width, clog2(16)-1 = 3 (-1 for 2-way) ofw = 4 # Short name for offset bit width, clog2(128/8) = 4 nby = 16 # Short name for number of cache blocks per way, 16/1 = 16 # In the lab, to simplify things, we always use all bits except for # the offset bits to represent the tag instead of storing the 25-bit # tag and concatenate everytime with the index bits and even the bank # bits to get the address of a cacheline tgw = 28 # Short name for tag bit width, 32-4 = 28 #--------------------------------------------------------------------- # Interface #--------------------------------------------------------------------- # Cache request s.cachereq_msg = InPort(MemReqMsg(o, abw, dbw)) # Cache response s.cacheresp_msg = OutPort(MemRespMsg(o, dbw)) # Memory request s.memreq_msg = OutPort(MemReqMsg(o, abw, clw)) # Memory response s.memresp_msg = InPort(MemRespMsg(o, clw)) # From Ctrl s.cachereq_en = InPort(1) s.memresp_en = InPort(1) s.read_data_reg_en = InPort(1) s.evict_addr_reg_en = InPort(1) # We'll have split tag arrays and unified data array s.tag_array_wen0 = InPort(1) s.tag_array_wen1 = InPort(1) s.data_array_wen = InPort(1) s.data_array_wben = InPort(clw / 8) s.write_data_mux_sel = InPort(1) s.memreq_addr_mux_sel = InPort(1) s.read_word_mux_sel = InPort(3) s.hit = InPort(2) s.cacheresp_type = InPort(3) s.memreq_type = InPort(3) s.way_mux_sel = InPort(1) # which tag array to read from? s.victim = InPort(1) # Victim for evict and refill path s.victim_mux_sel = InPort(1) # Evict/refill or RD/WD? s.tag_match1_reg_en = InPort(1) # When to register tag_match1? # To Ctrl s.cachereq_type = OutPort(3) s.cachereq_addr = OutPort(32) s.tag_match0 = OutPort(1) s.tag_match1 = OutPort(1) s.which_way = OutPort(1) #--------------------------------------------------------------------- # Componenet definitions #--------------------------------------------------------------------- s.cachereq_opaque_reg = m = RegEnRst(dtype=8) s.connect_pairs(m.en, s.cachereq_en, m.in_, s.cachereq_msg.opaque, m.out, s.cacheresp_msg.opaque) s.cachereq_type_reg = m = RegEnRst(dtype=3) s.connect_pairs(m.en, s.cachereq_en, m.in_, s.cachereq_msg.type_, m.out, s.cachereq_type) s.cachereq_addr_reg = m = RegEnRst(dtype=32) s.connect_pairs(m.en, s.cachereq_en, m.in_, s.cachereq_msg.addr, m.out, s.cachereq_addr) s.cachereq_data_reg = m = RegEnRst(dtype=32) s.connect_pairs(m.en, s.cachereq_en, m.in_, s.cachereq_msg.data) s.memresp_data_reg = m = RegEnRst(dtype=128) s.connect_pairs(m.en, s.memresp_en, m.in_, s.memresp_msg.data) s.evict_addr_reg = m = RegEnRst(dtype=32) s.connect_pairs(m.en, s.evict_addr_reg_en) s.tag_array0 = m = SRAMBitsComb_rst_1rw(num_entries=8, data_nbits=28) s.connect_pairs(m.wen, s.tag_array_wen0, m.addr, s.cachereq_addr_reg.out[4 + idx_shamt:7 + idx_shamt], m.wdata, s.cachereq_addr_reg.out[4:32]) s.tag_array1 = m = SRAMBitsComb_rst_1rw(num_entries=8, data_nbits=28) s.connect_pairs(m.wen, s.tag_array_wen1, m.addr, s.cachereq_addr_reg.out[4 + idx_shamt:7 + idx_shamt], m.wdata, s.cachereq_addr_reg.out[4:32]) s.data_array = m = SRAMBytesComb_rst_1rw(num_entries=16, num_nbytes=16) s.connect_pairs(m.wen, s.data_array_wen, m.wben, s.data_array_wben, m.addr[0:3], s.cachereq_addr_reg.out[4 + idx_shamt:7 + idx_shamt]) s.read_data_reg = m = RegEnRst(dtype=128) s.connect_pairs(m.en, s.read_data_reg_en, m.in_, s.data_array.rdata) s.cachereq_data_repl = m = Repl(in_dtype=32, factor=4) s.connect_pairs(m.in_, s.cachereq_data_reg.out) s.write_data_mux = m = Mux(dtype=128, nports=2) s.connect_pairs(m.in_[0], s.cachereq_data_repl.out, m.in_[1], s.memresp_data_reg.out, m.sel, s.write_data_mux_sel, m.out, s.data_array.wdata) s.tag_comparator0 = m = EqComparator(nbits=28) s.connect_pairs(m.in0, s.cachereq_addr_reg.out[4:32], m.in1, s.tag_array0.rdata, m.out, s.tag_match0) s.tag_comparator1 = m = EqComparator(nbits=28) s.connect_pairs(m.in0, s.cachereq_addr_reg.out[4:32], m.in1, s.tag_array1.rdata, m.out, s.tag_match1) s.tag_match1_reg = m = RegEnRst(dtype=1) s.connect_pairs(m.in_, s.tag_comparator1.out, m.en, s.tag_match1_reg_en, m.out, s.which_way) s.victim_mux = m = Mux(dtype=1, nports=2) s.connect_pairs(m.in_[0], s.victim, m.in_[1], s.tag_match1_reg.out, m.sel, s.victim_mux_sel, m.out, s.data_array.addr[3:4]) s.way_mux = m = Mux(dtype=28, nports=2) s.connect_pairs(m.in_[0], s.tag_array0.rdata, m.in_[1], s.tag_array1.rdata, m.sel, s.way_mux_sel) s.evict_mkaddr = m = MakeAddr(in_dtype=28, out_dtype=32) s.connect_pairs(m.in_, s.way_mux.out, m.out, s.evict_addr_reg.in_) s.memreq_mkaddr = m = MakeAddr(in_dtype=28, out_dtype=32) s.connect_pairs(m.in_, s.cachereq_addr_reg.out[4:32]) s.memreq_addr_mux = m = Mux(dtype=32, nports=2) s.connect_pairs(m.in_[0], s.evict_addr_reg.out, m.in_[1], s.memreq_mkaddr.out, m.sel, s.memreq_addr_mux_sel) s.read_word_mux = m = Mux(dtype=32, nports=5) s.connect_pairs(m.in_[0], s.read_data_reg.out[96:128], m.in_[1], s.read_data_reg.out[64:96], m.in_[2], s.read_data_reg.out[32:64], m.in_[3], s.read_data_reg.out[0:32], m.in_[4], 0, m.sel, s.read_word_mux_sel) #--------------------------------------------------------------------- # Connect output interfaces and signals #--------------------------------------------------------------------- # cacheresp_msg s.connect_pairs(s.cacheresp_msg.type_, s.cacheresp_type, s.cacheresp_msg.len, 0, s.cacheresp_msg.test, s.hit, s.cacheresp_msg.data, s.read_word_mux.out) # memreq_msg s.connect_pairs(s.memreq_msg.opaque, 0, s.memreq_msg.type_, s.memreq_type, s.memreq_msg.len, 0, s.memreq_msg.addr, s.memreq_addr_mux.out, s.memreq_msg.data, s.read_data_reg.out)