def __init__(s, nlanes, nmul_stages, cop_addr_nbits=5, cop_data_nbits=32, mem_addr_nbits=32, mem_data_nbits=32): # Config Params s.nlanes = nlanes s.nmul_stages = nmul_stages s.cop_addr_nbits = cop_addr_nbits s.cop_data_nbits = cop_data_nbits s.memreq_params = mem_msgs.MemReqParams(mem_addr_nbits, mem_data_nbits) s.memresp_params = mem_msgs.MemRespParams(mem_data_nbits) # Interface s.from_cpu = InValRdyBundle(cop_addr_nbits + cop_data_nbits) s.to_cpu = OutPort(1) s.lane_req = [ OutValRdyBundle(s.memreq_params.nbits) for x in range(s.nlanes) ] s.lane_resp = [ InValRdyBundle(s.memresp_params.nbits) for x in range(s.nlanes) ]
def __init__(s, cop_addr_nbits=5, cop_data_nbits=32, mem_addr_nbits=32, mem_data_nbits=32): # Config params s.addr_nbits = cop_addr_nbits s.data_nbits = cop_data_nbits s.mreq_p = mem_msgs.MemReqParams(mem_addr_nbits, mem_data_nbits) s.mresp_p = mem_msgs.MemRespParams(mem_data_nbits) # COP interface s.from_cpu = InValRdyBundle(s.addr_nbits + s.data_nbits) s.to_cpu = OutMatrixVecBundle() # Memory request/response ports s.memreq = InValRdyBundle(s.mreq_p.nbits) s.memresp = OutValRdyBundle(s.mresp_p.nbits) # Internal functional model s.mem = BytesMemPortProxy(s.mreq_p, s.mresp_p, s.memreq, s.memresp) s.xcel_mvmult = MatrixVec(s.mem)
def __init__(s, lane_id, mem_delay): memreq_params = mem_msgs.MemReqParams(32, 32) memresp_params = mem_msgs.MemRespParams(32) s.mem = TestMemory(memreq_params, memresp_params, 1, mem_delay) s.lane = MatrixVecLaneFL(lane_id, memreq_params, memresp_params)
def single_port_mem_test_msgs(): # number of memory ports nports = 1 # Create parameters memreq_params = mem_msgs.MemReqParams(32, 32) memresp_params = mem_msgs.MemRespParams(32) src_msgs = [[] for x in range(nports)] sink_msgs = [[] for x in range(nports)] # Syntax helpers req = memreq_params.mk_req resp = memresp_params.mk_resp def req_rd(addr, len_, data): return req(memreq_params.type_read, addr, len_, data) def req_wr(addr, len_, data): return req(memreq_params.type_write, addr, len_, data) def resp_rd(len_, data): return resp(memresp_params.type_read, len_, data) def resp_wr(len_, data): return resp(memresp_params.type_write, len_, data) def mk_req_resp(idx, req, resp): src_msgs[idx].append(req) sink_msgs[idx].append(resp) # Test messages # port mem_request mem_response mk_req_resp(0, req_wr(0x00001000, 1, 0x000000ab), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00001000, 1, 0x00000000), resp_rd(1, 0x000000ab)) mk_req_resp(0, req_wr(0x00001001, 1, 0x000000cd), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00001001, 1, 0x00000000), resp_rd(1, 0x000000cd)) mk_req_resp(0, req_wr(0x00001000, 1, 0x000000ef), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00001000, 1, 0x00000000), resp_rd(1, 0x000000ef)) mk_req_resp(0, req_wr(0x00002000, 2, 0x0000abcd), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00002000, 2, 0x00000000), resp_rd(2, 0x0000abcd)) mk_req_resp(0, req_wr(0x00002002, 2, 0x0000ef01), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00002002, 2, 0x00000000), resp_rd(2, 0x0000ef01)) mk_req_resp(0, req_wr(0x00002000, 2, 0x00002345), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00002000, 2, 0x00000000), resp_rd(2, 0x00002345)) mk_req_resp(0, req_wr(0x00004000, 0, 0xabcdef01), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00004000, 0, 0x00000000), resp_rd(0, 0xabcdef01)) mk_req_resp(0, req_wr(0x00004004, 0, 0x23456789), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00004004, 0, 0x00000000), resp_rd(0, 0x23456789)) mk_req_resp(0, req_wr(0x00004000, 0, 0xdeadbeef), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00004000, 0, 0x00000000), resp_rd(0, 0xdeadbeef)) return [src_msgs, sink_msgs]
def __init__(s, lane_id, nmul_stages, mem_delay): s.memreq_params = mem_msgs.MemReqParams(32, 32) s.memresp_params = mem_msgs.MemRespParams(32) s.mem = TestMemory(s.memreq_params, s.memresp_params, 1, mem_delay) mem_ifc = MemMsg(32, 32) cpu_ifc = CP2Msg(5, 32) s.lane = DotProduct(mem_ifc, cpu_ifc)
def __init__(s, lane_id, nmul_stages, mem_delay, test_verilog): memreq_params = mem_msgs.MemReqParams(32, 32) memresp_params = mem_msgs.MemRespParams(32) s.mem = TestMemory(memreq_params, memresp_params, 1, mem_delay) s.lane = MatrixVecLaneRTL(lane_id, nmul_stages, memreq_params, memresp_params) if test_verilog: s.lane = TranslationTool(s.lane)
def __init__(s, nlanes, mem_delay, src_delay, config_msgs): memreq_params = mem_msgs.MemReqParams(32, 32) memresp_params = mem_msgs.MemRespParams(32) s.src = TestSource(5 + 32, config_msgs, src_delay) s.mgr = LaneManager(nlanes) s.lane = [ MatrixVecLaneFL(x, memreq_params, memresp_params) for x in range(nlanes) ] s.mem = TestMemory(memreq_params, memresp_params, nlanes, mem_delay) assert nlanes > 0 s.nlanes = nlanes
def __init__(s, num_mvmults, mem_delay): # Create parameters memreq_p = mem_msgs.MemReqParams(32, 32) memresp_p = mem_msgs.MemRespParams(32) # Internal state s.num_mvmults = num_mvmults s.mvmults_count = 0 # Instantiate models s.driver = TestDriver() s.mvmult = MatrixVecFL() s.mem = TestMemory(memreq_p, memresp_p, 1, mem_delay)
def __init__( s, num_mvmults, src_msgs, src_delay, mem_delay ): # Create parameters memreq_p = mem_msgs.MemReqParams( 32, 32 ) memresp_p = mem_msgs.MemRespParams( 32 ) # Internal state s.num_mvmults = num_mvmults s.mvmults_count = 0 # Instantiate models s.src = TestSource ( 37, src_msgs, src_delay ) s.mvmult = MatrixVecCL () s.mem = TestMemory ( memreq_p, memresp_p, 1, mem_delay )
def __init__(s, nlanes, nmul_stages, mem_delay, src_delay, config_msgs): cop_addr_nbits = 5 cop_data_nbits = 32 mem_addr_nbits = 32 mem_data_nbits = 32 memreq_params = mem_msgs.MemReqParams(mem_addr_nbits, mem_data_nbits) memresp_params = mem_msgs.MemRespParams(mem_data_nbits) s.src = TestSource(cop_addr_nbits + cop_data_nbits, config_msgs, src_delay) s.cop = MatrixVecFL(nlanes, nmul_stages, cop_addr_nbits, cop_data_nbits, mem_addr_nbits, mem_data_nbits) s.mem = TestMemory(memreq_params, memresp_params, nlanes, mem_delay) assert nlanes > 0 s.nlanes = nlanes
def test_basic(dump_vcd): # Create parameters memreq_params = mem_msgs.MemReqParams(32, 32) memresp_params = mem_msgs.MemRespParams(32) # Instantiate and elaborate the model model = TestHarness(memreq_params, memresp_params, 0x0200, 0x0000, 0x0100, 6) model.vcd_file = dump_vcd model.elaborate() # Write test data into the test memory for i in xrange(6): model.mem.mem.mem[0x0000 + i * 4] = 11 + i model.mem.mem.mem[0x0100 + i * 4] = 21 + i model.mem.mem.mem[0x0200 + i * 4] = 0 # Create a simulator using the simulation tool sim = SimulationTool(model) # Run the simulation print() sim.reset() while not model.done(): sim.print_line_trace() sim.cycle() # Add a couple extra ticks so that the VCD dump is nicer sim.cycle() sim.cycle() sim.cycle() # Verify the output for i in xrange(6): assert model.mem.mem.mem[0x0200 + i * 4] == 11 + i + 21 + i
def __init__(s, nlanes, nmul_stages, mem_delay, src_delay, config_msgs, test_verilog): memreq_params = mem_msgs.MemReqParams(32, 32) memresp_params = mem_msgs.MemRespParams(32) s.src = TestSource(5 + 32, config_msgs, src_delay) s.mgr = LaneManager(nlanes) s.lane = [ MatrixVecLaneRTL(x, nmul_stages, memreq_params, memresp_params) for x in range(nlanes) ] s.mem = TestMemory(memreq_params, memresp_params, nlanes, mem_delay) if test_verilog: s.mgr = TranslationTool(s.mgr) s.lane = [TranslationTool(x) for x in s.lane] assert nlanes > 0 s.nlanes = nlanes
def __init__(s, nlanes, nmul_stages, mem_delay, src_delay, config_msgs, test_verilog): cop_addr_nbits = 5 cop_data_nbits = 32 mem_addr_nbits = 32 mem_data_nbits = 32 memreq_params = mem_msgs.MemReqParams(mem_addr_nbits, mem_data_nbits) memresp_params = mem_msgs.MemRespParams(mem_data_nbits) s.src = TestSource(cop_addr_nbits + cop_data_nbits, config_msgs, src_delay) s.cop = MatrixVecCOP(nlanes, nmul_stages, cop_addr_nbits, cop_data_nbits, mem_addr_nbits, mem_data_nbits) s.mem = TestMemory(memreq_params, memresp_params, nlanes, mem_delay) if test_verilog: s.cop = TranslationTool(s.cop) assert nlanes > 0 s.nlanes = nlanes
def run_mem_test(dump_vcd, src_delay, sink_delay, mem_delay, nports, test_msgs): # Create parameters memreq_params = mem_msgs.MemReqParams(32, 32) memresp_params = mem_msgs.MemRespParams(32) # src/sink msgs src_msgs = test_msgs[0] sink_msgs = test_msgs[1] # Instantiate and elaborate the model model = TestHarness(memreq_params, memresp_params, nports, src_msgs, sink_msgs, src_delay, sink_delay, mem_delay) model.vcd_file = dump_vcd model.elaborate() # Create a simulator using the simulation tool sim = SimulationTool(model) # Run the simulation print() sim.reset() while not model.done(): sim.print_line_trace() sim.cycle() # Add a couple extra ticks so that the VCD dump is nicer sim.cycle() sim.cycle() sim.cycle()
def quad_port_mem_test_msgs(): # number of memory ports nports = 4 # Create parameters memreq_params = mem_msgs.MemReqParams(32, 32) memresp_params = mem_msgs.MemRespParams(32) src_msgs = [[] for _ in range(nports)] sink_msgs = [[] for _ in range(nports)] # Syntax helpers req = memreq_params.mk_req resp = memresp_params.mk_resp def req_rd(addr, len_, data): return req(memreq_params.type_read, addr, len_, data) def req_wr(addr, len_, data): return req(memreq_params.type_write, addr, len_, data) def resp_rd(len_, data): return resp(memresp_params.type_read, len_, data) def resp_wr(len_, data): return resp(memresp_params.type_write, len_, data) def mk_req_resp(idx, req, resp): src_msgs[idx].append(req) sink_msgs[idx].append(resp) # Test messages # Note: Set the address of ports to large enough offset such that there # will be no overlap between port 0, port 1, port 2 and port 3 requests # port mem_request mem_response mk_req_resp(0, req_wr(0x00001000, 1, 0x000000ab), resp_wr(0, 0x00000000)) mk_req_resp(1, req_wr(0x00011000, 1, 0x000000ab), resp_wr(0, 0x00000000)) mk_req_resp(2, req_wr(0x00021000, 1, 0x000000ab), resp_wr(0, 0x00000000)) mk_req_resp(3, req_wr(0x00031000, 1, 0x000000ab), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00001000, 1, 0x00000000), resp_rd(1, 0x000000ab)) mk_req_resp(1, req_rd(0x00011000, 1, 0x00000000), resp_rd(1, 0x000000ab)) mk_req_resp(2, req_rd(0x00021000, 1, 0x00000000), resp_rd(1, 0x000000ab)) mk_req_resp(3, req_rd(0x00031000, 1, 0x00000000), resp_rd(1, 0x000000ab)) mk_req_resp(0, req_wr(0x00001001, 1, 0x000000cd), resp_wr(0, 0x00000000)) mk_req_resp(1, req_wr(0x00011001, 1, 0x000000cd), resp_wr(0, 0x00000000)) mk_req_resp(2, req_wr(0x00021001, 1, 0x000000cd), resp_wr(0, 0x00000000)) mk_req_resp(3, req_wr(0x00031001, 1, 0x000000cd), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00001001, 1, 0x00000000), resp_rd(1, 0x000000cd)) mk_req_resp(1, req_rd(0x00011001, 1, 0x00000000), resp_rd(1, 0x000000cd)) mk_req_resp(2, req_rd(0x00021001, 1, 0x00000000), resp_rd(1, 0x000000cd)) mk_req_resp(3, req_rd(0x00031001, 1, 0x00000000), resp_rd(1, 0x000000cd)) mk_req_resp(0, req_wr(0x00001000, 1, 0x000000ef), resp_wr(0, 0x00000000)) mk_req_resp(1, req_wr(0x00011000, 1, 0x000000ef), resp_wr(0, 0x00000000)) mk_req_resp(2, req_wr(0x00021000, 1, 0x000000ef), resp_wr(0, 0x00000000)) mk_req_resp(3, req_wr(0x00031000, 1, 0x000000ef), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00001000, 1, 0x00000000), resp_rd(1, 0x000000ef)) mk_req_resp(1, req_rd(0x00011000, 1, 0x00000000), resp_rd(1, 0x000000ef)) mk_req_resp(2, req_rd(0x00021000, 1, 0x00000000), resp_rd(1, 0x000000ef)) mk_req_resp(3, req_rd(0x00031000, 1, 0x00000000), resp_rd(1, 0x000000ef)) mk_req_resp(0, req_wr(0x00002000, 2, 0x0000abcd), resp_wr(0, 0x00000000)) mk_req_resp(1, req_wr(0x00012000, 2, 0x0000abcd), resp_wr(0, 0x00000000)) mk_req_resp(2, req_wr(0x00022000, 2, 0x0000abcd), resp_wr(0, 0x00000000)) mk_req_resp(3, req_wr(0x00032000, 2, 0x0000abcd), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00002000, 2, 0x00000000), resp_rd(2, 0x0000abcd)) mk_req_resp(1, req_rd(0x00012000, 2, 0x00000000), resp_rd(2, 0x0000abcd)) mk_req_resp(2, req_rd(0x00022000, 2, 0x00000000), resp_rd(2, 0x0000abcd)) mk_req_resp(3, req_rd(0x00032000, 2, 0x00000000), resp_rd(2, 0x0000abcd)) mk_req_resp(0, req_wr(0x00002002, 2, 0x0000ef01), resp_wr(0, 0x00000000)) mk_req_resp(1, req_wr(0x00012002, 2, 0x0000ef01), resp_wr(0, 0x00000000)) mk_req_resp(2, req_wr(0x00022002, 2, 0x0000ef01), resp_wr(0, 0x00000000)) mk_req_resp(3, req_wr(0x00032002, 2, 0x0000ef01), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00002002, 2, 0x00000000), resp_rd(2, 0x0000ef01)) mk_req_resp(1, req_rd(0x00012002, 2, 0x00000000), resp_rd(2, 0x0000ef01)) mk_req_resp(2, req_rd(0x00022002, 2, 0x00000000), resp_rd(2, 0x0000ef01)) mk_req_resp(3, req_rd(0x00032002, 2, 0x00000000), resp_rd(2, 0x0000ef01)) mk_req_resp(0, req_wr(0x00002000, 2, 0x00002345), resp_wr(0, 0x00000000)) mk_req_resp(1, req_wr(0x00012000, 2, 0x00002345), resp_wr(0, 0x00000000)) mk_req_resp(2, req_wr(0x00022000, 2, 0x00002345), resp_wr(0, 0x00000000)) mk_req_resp(3, req_wr(0x00032000, 2, 0x00002345), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00002000, 2, 0x00000000), resp_rd(2, 0x00002345)) mk_req_resp(1, req_rd(0x00012000, 2, 0x00000000), resp_rd(2, 0x00002345)) mk_req_resp(2, req_rd(0x00022000, 2, 0x00000000), resp_rd(2, 0x00002345)) mk_req_resp(3, req_rd(0x00032000, 2, 0x00000000), resp_rd(2, 0x00002345)) mk_req_resp(0, req_wr(0x00004000, 0, 0xabcdef01), resp_wr(0, 0x00000000)) mk_req_resp(1, req_wr(0x00014000, 0, 0xabcdef01), resp_wr(0, 0x00000000)) mk_req_resp(2, req_wr(0x00024000, 0, 0xabcdef01), resp_wr(0, 0x00000000)) mk_req_resp(3, req_wr(0x00034000, 0, 0xabcdef01), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00004000, 0, 0x00000000), resp_rd(0, 0xabcdef01)) mk_req_resp(1, req_rd(0x00014000, 0, 0x00000000), resp_rd(0, 0xabcdef01)) mk_req_resp(2, req_rd(0x00024000, 0, 0x00000000), resp_rd(0, 0xabcdef01)) mk_req_resp(3, req_rd(0x00034000, 0, 0x00000000), resp_rd(0, 0xabcdef01)) mk_req_resp(0, req_wr(0x00004004, 0, 0x23456789), resp_wr(0, 0x00000000)) mk_req_resp(1, req_wr(0x00014004, 0, 0x23456789), resp_wr(0, 0x00000000)) mk_req_resp(2, req_wr(0x00024004, 0, 0x23456789), resp_wr(0, 0x00000000)) mk_req_resp(3, req_wr(0x00034004, 0, 0x23456789), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00004004, 0, 0x00000000), resp_rd(0, 0x23456789)) mk_req_resp(1, req_rd(0x00014004, 0, 0x00000000), resp_rd(0, 0x23456789)) mk_req_resp(2, req_rd(0x00024004, 0, 0x00000000), resp_rd(0, 0x23456789)) mk_req_resp(3, req_rd(0x00034004, 0, 0x00000000), resp_rd(0, 0x23456789)) mk_req_resp(0, req_wr(0x00004000, 0, 0xdeadbeef), resp_wr(0, 0x00000000)) mk_req_resp(1, req_wr(0x00014000, 0, 0xdeadbeef), resp_wr(0, 0x00000000)) mk_req_resp(2, req_wr(0x00024000, 0, 0xdeadbeef), resp_wr(0, 0x00000000)) mk_req_resp(3, req_wr(0x00034000, 0, 0xdeadbeef), resp_wr(0, 0x00000000)) mk_req_resp(0, req_rd(0x00004000, 0, 0x00000000), resp_rd(0, 0xdeadbeef)) mk_req_resp(1, req_rd(0x00014000, 0, 0x00000000), resp_rd(0, 0xdeadbeef)) mk_req_resp(2, req_rd(0x00024000, 0, 0x00000000), resp_rd(0, 0xdeadbeef)) mk_req_resp(3, req_rd(0x00034000, 0, 0x00000000), resp_rd(0, 0xdeadbeef)) return [src_msgs, sink_msgs]
def __init__(s, cop_addr_nbits=5, cop_data_nbits=32, mem_addr_nbits=32, mem_data_nbits=32): # Config params s.addr_nbits = cop_addr_nbits s.data_nbits = cop_data_nbits s.mreq_p = mem_msgs.MemReqParams(mem_addr_nbits, mem_data_nbits) s.mresp_p = mem_msgs.MemRespParams(mem_data_nbits) # Shorter names s.mk_req = s.mreq_p.mk_req s.mk_resp = s.mresp_p.mk_resp s.rd = s.mreq_p.type_read s.wr = s.mresp_p.type_write s.data_slice = s.mresp_p.data_slice s.data_nbytes = s.mreq_p.data_nbits / 8 # COP interface s.from_cpu = InValRdyBundle(s.addr_nbits + s.data_nbits) s.to_cpu = OutMatrixVecBundle() # Memory request/response ports s.memreq = InValRdyBundle(s.mreq_p.nbits) s.memresp = OutValRdyBundle(s.mresp_p.nbits) # Internal state s.size = 0 s.src0_addr = 0 s.src1_addr = 0 s.dest_addr = 0 # Pipeline state s.src0_X1 = 0 s.accumulator_X1 = 0 s.token_X1 = False # State for X0 stage s.STATE_CMD = 0 s.STATE_L0 = 1 s.STATE_L1 = 2 s.STATE_TK = 3 s.STATE_WAIT1 = 4 s.STATE_SD = 5 s.STATE_WAIT2 = 6 s.STATE_DONE = 7 s.state = s.STATE_CMD # Types of transactions that can go down the pipeline s.XACT_TYPE_NONE = " " s.XACT_TYPE_L0 = "L0" s.XACT_TYPE_L1 = "L1" s.XACT_TYPE_TK = "TK" s.XACT_TYPE_SD = "SD" s.XACT_TYPE_DONE = "D " s.memreq_queue = Queue(1) s.pipe_queue_X0X1 = Queue(4)