def registerTopology(self, options): i = 0 for n in numa_nodes: if n: FileSystemConfig.register_node(n, MemorySize(options.mem_size) / num_numa_nodes, i) i += 1
def makeTopology(self, options, full_system, network, IntLink, ExtLink, Router): nodes = self.nodes num_routers = options.num_cpus num_rows = options.mesh_rows # default values for link latency and router latency. # Can be over-ridden on a per link/router basis link_latency = options.link_latency # used by simple and garnet router_latency = options.router_latency # only used by garnet # First determine which nodes are cache cntrls vs. dirs vs. dma cache_nodes = [] dir_nodes = [] dma_nodes = [] for node in nodes: if node.type == 'L1Cache_Controller' or \ node.type == 'L2Cache_Controller': cache_nodes.append(node) elif node.type == 'Directory_Controller': dir_nodes.append(node) elif node.type == 'DMA_Controller': dma_nodes.append(node) # Obviously the number or rows must be <= the number of routers # and evenly divisible. Also the number of caches must be a # multiple of the number of routers and the number of directories # must be four. assert (num_rows > 0 and num_rows <= num_routers) num_columns = int(num_routers / num_rows) assert (num_columns * num_rows == num_routers) caches_per_router, remainder = divmod(len(cache_nodes), num_routers) assert (remainder == 0) assert (len(dir_nodes) == 4) # Create the routers in the mesh routers = [Router(router_id=i, latency = router_latency) \ for i in range(num_routers)] network.routers = routers # link counter to set unique link ids link_count = 0 # Connect each cache controller to the appropriate router ext_links = [] for (i, n) in enumerate(cache_nodes): cntrl_level, router_id = divmod(i, num_routers) assert (cntrl_level < caches_per_router) ext_links.append( ExtLink(link_id=link_count, ext_node=n, int_node=routers[router_id], latency=link_latency)) link_count += 1 # NUMA Node for each quadrant # With odd columns or rows, the nodes will be unequal numa_nodes = [[], [], [], []] for i in xrange(num_routers): if i % num_columns < num_columns / 2 and \ i < num_routers / 2: numa_nodes[0].append(i) elif i % num_columns >= num_columns / 2 and \ i < num_routers / 2: numa_nodes[1].append(i) elif i % num_columns < num_columns / 2 and \ i >= num_routers / 2: numa_nodes[2].append(i) else: numa_nodes[3].append(i) num_numa_nodes = 0 for n in numa_nodes: if n: num_numa_nodes += 1 # Register nodes with filesystem if not full_system: for n in numa_nodes: if n: FileSystemConfig.register_node( n, MemorySize(options.mem_size) / num_numa_nodes) # Connect the dir nodes to the corners. ext_links.append( ExtLink(link_id=link_count, ext_node=dir_nodes[0], int_node=routers[0], latency=link_latency)) link_count += 1 ext_links.append( ExtLink(link_id=link_count, ext_node=dir_nodes[1], int_node=routers[num_columns - 1], latency=link_latency)) link_count += 1 ext_links.append( ExtLink(link_id=link_count, ext_node=dir_nodes[2], int_node=routers[num_routers - num_columns], latency=link_latency)) link_count += 1 ext_links.append( ExtLink(link_id=link_count, ext_node=dir_nodes[3], int_node=routers[num_routers - 1], latency=link_latency)) link_count += 1 # Connect the dma nodes to router 0. These should only be DMA nodes. for (i, node) in enumerate(dma_nodes): assert (node.type == 'DMA_Controller') ext_links.append( ExtLink(link_id=link_count, ext_node=node, int_node=routers[0], latency=link_latency)) network.ext_links = ext_links # Create the mesh links. int_links = [] # East output to West input links (weight = 1) for row in xrange(num_rows): for col in xrange(num_columns): if (col + 1 < num_columns): east_out = col + (row * num_columns) west_in = (col + 1) + (row * num_columns) int_links.append( IntLink(link_id=link_count, src_node=routers[east_out], dst_node=routers[west_in], src_outport="East", dst_inport="West", latency=link_latency, weight=1)) link_count += 1 # West output to East input links (weight = 1) for row in xrange(num_rows): for col in xrange(num_columns): if (col + 1 < num_columns): east_in = col + (row * num_columns) west_out = (col + 1) + (row * num_columns) int_links.append( IntLink(link_id=link_count, src_node=routers[west_out], dst_node=routers[east_in], src_outport="West", dst_inport="East", latency=link_latency, weight=1)) link_count += 1 # North output to South input links (weight = 2) for col in xrange(num_columns): for row in xrange(num_rows): if (row + 1 < num_rows): north_out = col + (row * num_columns) south_in = col + ((row + 1) * num_columns) int_links.append( IntLink(link_id=link_count, src_node=routers[north_out], dst_node=routers[south_in], src_outport="North", dst_inport="South", latency=link_latency, weight=2)) link_count += 1 # South output to North input links (weight = 2) for col in xrange(num_columns): for row in xrange(num_rows): if (row + 1 < num_rows): north_in = col + (row * num_columns) south_out = col + ((row + 1) * num_columns) int_links.append( IntLink(link_id=link_count, src_node=routers[south_out], dst_node=routers[north_in], src_outport="South", dst_inport="North", latency=link_latency, weight=2)) link_count += 1 network.int_links = int_links
def registerTopology(self, options): for i in range(options.num_cpus): FileSystemConfig.register_node([i], MemorySize(options.mem_size) // options.num_cpus, i)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Three_Level': fatal("This script requires the MESI_Three_Level protocol to be\ built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes # must be listed before the directory nodes and directory nodes before # dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) num_cpus_per_cluster = options.num_cpus // options.num_clusters assert (options.num_l2caches % options.num_clusters == 0) num_l2caches_per_cluster = options.num_l2caches // options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_index_start = block_size_bits + l2_bits # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # for i in range(options.num_clusters): for j in range(num_cpus_per_cluster): # # First create the Ruby objects associated with this cpu # l0i_cache = L0Cache(size = options.l0i_size, assoc = options.l0i_assoc, is_icache = True, start_index_bit = block_size_bits, replacement_policy = LRURP()) l0d_cache = L0Cache(size = options.l0d_size, assoc = options.l0d_assoc, is_icache = False, start_index_bit = block_size_bits, replacement_policy = LRURP()) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain # Ruby prefetcher prefetcher = RubyPrefetcher( num_streams=16, unit_filter = 256, nonunit_filter = 256, train_misses = 5, num_startup_pfs = 4, cross_page = True ) l0_cntrl = L0Cache_Controller( version = i * num_cpus_per_cluster + j, Icache = l0i_cache, Dcache = l0d_cache, transitions_per_cycle = options.l0_transitions_per_cycle, prefetcher = prefetcher, enable_prefetch = options.enable_prefetch, send_evictions = send_evicts(options), clk_domain = clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j, icache = l0i_cache, clk_domain = clk_domain, dcache = l0d_cache, ruby_system = ruby_system) l0_cntrl.sequencer = cpu_seq l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) l1_cntrl = L1Cache_Controller( version = i * num_cpus_per_cluster + j, cache = l1_cache, l2_select_num_bits = l2_bits, cluster_id = i, transitions_per_cycle = options.l1_transitions_per_cycle, ruby_system = ruby_system) exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( i * num_cpus_per_cluster + j)) exec("ruby_system.l1_cntrl%d = l1_cntrl" % ( i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) l0_cntrl_nodes.append(l0_cntrl) l1_cntrl_nodes.append(l1_cntrl) # Connect the L0 and L1 controllers l0_cntrl.prefetchQueue = MessageBuffer() l0_cntrl.mandatoryQueue = MessageBuffer() l0_cntrl.bufferToL1 = MessageBuffer(ordered = True) l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1 l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True) l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1 # Connect the L1 controllers and the network l1_cntrl.requestToL2 = MessageBuffer() l1_cntrl.requestToL2.master = ruby_system.network.slave l1_cntrl.responseToL2 = MessageBuffer() l1_cntrl.responseToL2.master = ruby_system.network.slave l1_cntrl.unblockToL2 = MessageBuffer() l1_cntrl.unblockToL2.master = ruby_system.network.slave l1_cntrl.requestFromL2 = MessageBuffer() l1_cntrl.requestFromL2.slave = ruby_system.network.master l1_cntrl.responseFromL2 = MessageBuffer() l1_cntrl.responseFromL2.slave = ruby_system.network.master for j in range(num_l2caches_per_cluster): l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller( version = i * num_l2caches_per_cluster + j, L2cache = l2_cache, cluster_id = i, transitions_per_cycle =\ options.l2_transitions_per_cycle, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % (i * num_l2caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain = ruby_system.clk_domain, clk_divider = 3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l0_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: for i in range(options.num_clusters): for j in range(num_cpus_per_cluster): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = range(options.num_cpus), core_id = i*num_cpus_per_cluster+j, thread_siblings = []) FileSystemConfig.register_cache(level = 0, idu_type = 'Instruction', size = options.l0i_size, line_size =\ options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l0d_size, line_size =\ options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 1, idu_type = 'Unified', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = str(MemorySize(options.l2_size) * \ num_l2caches_per_cluster)+'B', line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [n for n in range(i*num_cpus_per_cluster, \ (i+1)*num_cpus_per_cluster)]) ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_devices, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'GPU_VIPER': panic("This script requires the GPU_VIPER protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes # must be listed before the directory nodes and directory nodes before # dma nodes, etc. # cp_cntrl_nodes = [] tcp_cntrl_nodes = [] sqc_cntrl_nodes = [] tcc_cntrl_nodes = [] dir_cntrl_nodes = [] l3_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # # For an odd number of CPUs, still create the right number of controllers TCC_bits = int(math.log(options.num_tccs, 2)) # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu # Clusters crossbar_bw = None mainCluster = None if options.numa_high_bit: numa_bit = options.numa_high_bit else: # if the numa_bit is not specified, set the directory bits as the # lowest bits above the block offset bits, and the numa_bit as the # highest of those directory bits dir_bits = int(math.log(options.num_dirs, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) numa_bit = block_size_bits + dir_bits - 1 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: #Assuming a 2GHz clock crossbar_bw = 16 * options.num_compute_units * options.bw_scalor mainCluster = Cluster(intBW=crossbar_bw) else: mainCluster = Cluster(intBW=8) # 16 GB/s for i in range(options.num_dirs): dir_ranges = [] for r in system.mem_ranges: addr_range = m5.objects.AddrRange(r.start, size=r.size(), intlvHighBit=numa_bit, intlvBits=dir_bits, intlvMatch=i) dir_ranges.append(addr_range) dir_cntrl = DirCntrl(noTCCdir=True, TCC_select_num_bits=TCC_bits) dir_cntrl.create(options, dir_ranges, ruby_system, system) dir_cntrl.number_of_TBEs = options.num_tbes dir_cntrl.useL3OnWT = options.use_L3_on_WT # the number_of_TBEs is inclusive of TBEs below # Connect the Directory controller to the ruby network dir_cntrl.requestFromCores = MessageBuffer(ordered=True) dir_cntrl.requestFromCores.slave = ruby_system.network.master dir_cntrl.responseFromCores = MessageBuffer() dir_cntrl.responseFromCores.slave = ruby_system.network.master dir_cntrl.unblockFromCores = MessageBuffer() dir_cntrl.unblockFromCores.slave = ruby_system.network.master dir_cntrl.probeToCore = MessageBuffer() dir_cntrl.probeToCore.master = ruby_system.network.slave dir_cntrl.responseToCore = MessageBuffer() dir_cntrl.responseToCore.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered=True) dir_cntrl.L3triggerQueue = MessageBuffer(ordered=True) dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() dir_cntrl.requestFromDMA = MessageBuffer(ordered=True) dir_cntrl.requestFromDMA.slave = ruby_system.network.master dir_cntrl.responseToDMA = MessageBuffer() dir_cntrl.responseToDMA.master = ruby_system.network.slave dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) mainCluster.add(dir_cntrl) cpuCluster = None if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: cpuCluster = Cluster(extBW=crossbar_bw, intBW=crossbar_bw) else: cpuCluster = Cluster(extBW=8, intBW=8) # 16 GB/s for i in range((options.num_cpus + 1) // 2): cp_cntrl = CPCntrl() cp_cntrl.create(options, ruby_system, system) exec("ruby_system.cp_cntrl%d = cp_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) # Connect the CP controllers and the network cp_cntrl.requestFromCore = MessageBuffer() cp_cntrl.requestFromCore.master = ruby_system.network.slave cp_cntrl.responseFromCore = MessageBuffer() cp_cntrl.responseFromCore.master = ruby_system.network.slave cp_cntrl.unblockFromCore = MessageBuffer() cp_cntrl.unblockFromCore.master = ruby_system.network.slave cp_cntrl.probeToCore = MessageBuffer() cp_cntrl.probeToCore.slave = ruby_system.network.master cp_cntrl.responseToCore = MessageBuffer() cp_cntrl.responseToCore.slave = ruby_system.network.master cp_cntrl.mandatoryQueue = MessageBuffer() cp_cntrl.triggerQueue = MessageBuffer(ordered=True) cpuCluster.add(cp_cntrl) # Register CPUs and caches for each CorePair and directory (SE mode only) if not full_system: for i in range((options.num_cpus + 1) // 2): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = \ range(options.num_cpus), core_id = i*2, thread_siblings = []) FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = \ range(options.num_cpus), core_id = i*2+1, thread_siblings = []) FileSystemConfig.register_cache(level=0, idu_type='Instruction', size=options.l1i_size, line_size=options.cacheline_size, assoc=options.l1i_assoc, cpus=[i * 2, i * 2 + 1]) FileSystemConfig.register_cache(level=0, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i * 2]) FileSystemConfig.register_cache(level=0, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i * 2 + 1]) FileSystemConfig.register_cache(level=1, idu_type='Unified', size=options.l2_size, line_size=options.cacheline_size, assoc=options.l2_assoc, cpus=[i * 2, i * 2 + 1]) for i in range(options.num_dirs): FileSystemConfig.register_cache( level=2, idu_type='Unified', size=options.l3_size, line_size=options.cacheline_size, assoc=options.l3_assoc, cpus=[n for n in range(options.num_cpus)]) gpuCluster = None if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: gpuCluster = Cluster(extBW=crossbar_bw, intBW=crossbar_bw) else: gpuCluster = Cluster(extBW=8, intBW=8) # 16 GB/s for i in range(options.num_compute_units): tcp_cntrl = TCPCntrl(TCC_select_num_bits=TCC_bits, issue_latency=1, number_of_TBEs=2560) # TBEs set to max outstanding requests tcp_cntrl.create(options, ruby_system, system) tcp_cntrl.WB = options.WB_L1 tcp_cntrl.disableL1 = options.noL1 tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(tcp_cntrl.coalescer) tcp_cntrl_nodes.append(tcp_cntrl) # Connect the TCP controller to the ruby network tcp_cntrl.requestFromTCP = MessageBuffer(ordered=True) tcp_cntrl.requestFromTCP.master = ruby_system.network.slave tcp_cntrl.responseFromTCP = MessageBuffer(ordered=True) tcp_cntrl.responseFromTCP.master = ruby_system.network.slave tcp_cntrl.unblockFromCore = MessageBuffer() tcp_cntrl.unblockFromCore.master = ruby_system.network.slave tcp_cntrl.probeToTCP = MessageBuffer(ordered=True) tcp_cntrl.probeToTCP.slave = ruby_system.network.master tcp_cntrl.responseToTCP = MessageBuffer(ordered=True) tcp_cntrl.responseToTCP.slave = ruby_system.network.master tcp_cntrl.mandatoryQueue = MessageBuffer() gpuCluster.add(tcp_cntrl) for i in range(options.num_sqc): sqc_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits) sqc_cntrl.create(options, ruby_system, system) exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(sqc_cntrl.sequencer) # Connect the SQC controller to the ruby network sqc_cntrl.requestFromSQC = MessageBuffer(ordered=True) sqc_cntrl.requestFromSQC.master = ruby_system.network.slave sqc_cntrl.probeToSQC = MessageBuffer(ordered=True) sqc_cntrl.probeToSQC.slave = ruby_system.network.master sqc_cntrl.responseToSQC = MessageBuffer(ordered=True) sqc_cntrl.responseToSQC.slave = ruby_system.network.master sqc_cntrl.mandatoryQueue = MessageBuffer() # SQC also in GPU cluster gpuCluster.add(sqc_cntrl) for i in xrange(options.num_scalar_cache): scalar_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits) scalar_cntrl.create(options, ruby_system, system) exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i) cpu_sequencers.append(scalar_cntrl.sequencer) scalar_cntrl.requestFromSQC = MessageBuffer(ordered=True) scalar_cntrl.requestFromSQC.master = ruby_system.network.slave scalar_cntrl.probeToSQC = MessageBuffer(ordered=True) scalar_cntrl.probeToSQC.slave = ruby_system.network.master scalar_cntrl.responseToSQC = MessageBuffer(ordered=True) scalar_cntrl.responseToSQC.slave = ruby_system.network.master scalar_cntrl.mandatoryQueue = \ MessageBuffer(buffer_size=options.scalar_buffer_size) gpuCluster.add(scalar_cntrl) for i in xrange(options.num_cp): tcp_ID = options.num_compute_units + i sqc_ID = options.num_sqc + i tcp_cntrl = TCPCntrl(TCC_select_num_bits=TCC_bits, issue_latency=1, number_of_TBEs=2560) # TBEs set to max outstanding requests tcp_cntrl.createCP(options, ruby_system, system) tcp_cntrl.WB = options.WB_L1 tcp_cntrl.disableL1 = options.noL1 tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(tcp_cntrl.sequencer) tcp_cntrl_nodes.append(tcp_cntrl) # Connect the CP (TCP) controllers to the ruby network tcp_cntrl.requestFromTCP = MessageBuffer(ordered=True) tcp_cntrl.requestFromTCP.master = ruby_system.network.slave tcp_cntrl.responseFromTCP = MessageBuffer(ordered=True) tcp_cntrl.responseFromTCP.master = ruby_system.network.slave tcp_cntrl.unblockFromCore = MessageBuffer(ordered=True) tcp_cntrl.unblockFromCore.master = ruby_system.network.slave tcp_cntrl.probeToTCP = MessageBuffer(ordered=True) tcp_cntrl.probeToTCP.slave = ruby_system.network.master tcp_cntrl.responseToTCP = MessageBuffer(ordered=True) tcp_cntrl.responseToTCP.slave = ruby_system.network.master tcp_cntrl.mandatoryQueue = MessageBuffer() gpuCluster.add(tcp_cntrl) sqc_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits) sqc_cntrl.create(options, ruby_system, system) exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(sqc_cntrl.sequencer) # SQC also in GPU cluster gpuCluster.add(sqc_cntrl) for i in range(options.num_tccs): tcc_cntrl = TCCCntrl(l2_response_latency=options.TCC_latency) tcc_cntrl.create(options, ruby_system, system) tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency tcc_cntrl.l2_response_latency = options.TCC_latency tcc_cntrl_nodes.append(tcc_cntrl) tcc_cntrl.WB = options.WB_L2 tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units # the number_of_TBEs is inclusive of TBEs below # Connect the TCC controllers to the ruby network tcc_cntrl.requestFromTCP = MessageBuffer(ordered=True) tcc_cntrl.requestFromTCP.slave = ruby_system.network.master tcc_cntrl.responseToCore = MessageBuffer(ordered=True) tcc_cntrl.responseToCore.master = ruby_system.network.slave tcc_cntrl.probeFromNB = MessageBuffer() tcc_cntrl.probeFromNB.slave = ruby_system.network.master tcc_cntrl.responseFromNB = MessageBuffer() tcc_cntrl.responseFromNB.slave = ruby_system.network.master tcc_cntrl.requestToNB = MessageBuffer(ordered=True) tcc_cntrl.requestToNB.master = ruby_system.network.slave tcc_cntrl.responseToNB = MessageBuffer() tcc_cntrl.responseToNB.master = ruby_system.network.slave tcc_cntrl.unblockToNB = MessageBuffer() tcc_cntrl.unblockToNB.master = ruby_system.network.slave tcc_cntrl.triggerQueue = MessageBuffer(ordered=True) exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i) # connect all of the wire buffers between L3 and dirs up # TCC cntrls added to the GPU cluster gpuCluster.add(tcc_cntrl) for i, dma_device in enumerate(dma_devices): dma_seq = DMASequencer(version=i, ruby_system=ruby_system) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, ruby_system=ruby_system) exec('system.dma_cntrl%d = dma_cntrl' % i) if dma_device.type == 'MemTest': exec('system.dma_cntrl%d.dma_sequencer.slave = dma_devices.test' % i) else: exec('system.dma_cntrl%d.dma_sequencer.slave = dma_device.dma' % i) dma_cntrl.requestToDir = MessageBuffer(buffer_size=0) dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size=0) gpuCluster.add(dma_cntrl) # Add cpu/gpu clusters to main cluster mainCluster.add(cpuCluster) mainCluster.add(gpuCluster) ruby_system.network.number_of_virtual_networks = 11 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_hammer': panic("This script requires the MOESI_hammer protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in range(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits, is_icache=True) l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits) l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, L2cache=l2_cache, no_mig_atomic=not \ options.allow_atomic_migration, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: l1_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave l1_cntrl.triggerQueue = MessageBuffer() # Connect the buffers from the network to the controller l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the # size of the L2 cache. # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index assert (options.numa_high_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits else: if dir_bits > 0: pf_start_bit = dir_bits + block_size_bits - 1 else: pf_start_bit = block_size_bits # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: pf = ProbeFilter(size=pf_size, assoc=4, start_index_bit=pf_start_bit) dir_cntrl.probeFilter = pf dir_cntrl.probe_filter_enabled = options.pf_on dir_cntrl.full_bit_dir_enabled = options.dir_on if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered=True) dir_cntrl.unblockToDir = MessageBuffer() dir_cntrl.unblockToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system, slave=dma_port) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) if options.recycle_latency: dma_cntrl.recycle_latency = options.recycle_latency # Connect the dma controller to the network dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.mandatoryQueue = MessageBuffer() all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.mandatoryQueue = MessageBuffer() all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: FileSystemConfig.config_filesystem(options) for i in xrange(options.num_cpus): FileSystemConfig.register_cpu(physical_package_id=0, core_siblings=[], core_id=i, thread_siblings=[]) FileSystemConfig.register_cache(level=1, idu_type='Instruction', size=options.l1i_size, line_size=options.cacheline_size, assoc=options.l1i_assoc, cpus=[i]) FileSystemConfig.register_cache(level=1, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i]) FileSystemConfig.register_cache(level=2, idu_type='Unified', size=options.l2_size, line_size=options.cacheline_size, assoc=options.l2_assoc, cpus=[i]) ruby_system.network.number_of_virtual_networks = 6 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, piobus=None, dma_ports=[], bootmem=None): system.ruby = RubySystem() ruby = system.ruby # Generate pseudo filesystem FileSystemConfig.config_filesystem(system, options) # Create the network object (network, IntLinkClass, ExtLinkClass, RouterClass, InterfaceClass) = \ Network.create_network(options, ruby) ruby.network = network protocol = buildEnv['PROTOCOL'] exec("from . import %s" % protocol) try: (cpu_sequencers, dir_cntrls, topology) = \ eval("%s.create_system(options, full_system, system, dma_ports,\ bootmem, ruby)" % protocol) except: print("Error: could not create sytem for ruby protocol %s" % protocol) raise # Create the network topology topology.makeTopology(options, network, IntLinkClass, ExtLinkClass, RouterClass) # Register the topology elements with faux filesystem (SE mode only) if not full_system: topology.registerTopology(options) # Initialize network based on topology Network.init_network(options, network, InterfaceClass) # Create a port proxy for connecting the system port. This is # independent of the protocol and kept in the protocol-agnostic # part (i.e. here). sys_port_proxy = RubyPortProxy(ruby_system=ruby) if piobus is not None: sys_port_proxy.pio_master_port = piobus.slave # Give the system port proxy a SimObject parent without creating a # full-fledged controller system.sys_port_proxy = sys_port_proxy # Connect the system port for loading of binaries etc system.system_port = system.sys_port_proxy.slave setup_memory_controllers(system, ruby, dir_cntrls, options) # Connect the cpu sequencers and the piobus if piobus != None: for cpu_seq in cpu_sequencers: cpu_seq.pio_master_port = piobus.slave cpu_seq.mem_master_port = piobus.slave if buildEnv['TARGET_ISA'] == "x86": cpu_seq.pio_slave_port = piobus.master ruby.number_of_virtual_networks = ruby.network.number_of_virtual_networks ruby._cpu_ports = cpu_sequencers ruby.num_of_sequencers = len(cpu_sequencers) # Create a backing copy of physical memory in case required if options.access_backing_store: ruby.access_backing_store = True ruby.phys_mem = SimpleMemory(range=system.mem_ranges[0], in_addr_map=False)
def create_system(options, full_system, system, dma_devices, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_AMD_Base': panic("This script requires the MOESI_AMD_Base protocol.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to # be consistent with the NetDest list. Therefore the l1 controller # nodes must be listed before the directory nodes and directory nodes # before dma nodes, etc. # l1_cntrl_nodes = [] l3_cntrl_nodes = [] dir_cntrl_nodes = [] control_count = 0 # # Must create the individual controllers before the network to ensure # the controller constructors are called before the network constructor # # This is the base crossbar that connects the L3s, Dirs, and cpu # Cluster mainCluster = Cluster(extBW=512, intBW=512) # 1 TB/s if options.numa_high_bit: numa_bit = options.numa_high_bit else: # if the numa_bit is not specified, set the directory bits as the # lowest bits above the block offset bits, and the numa_bit as the # highest of those directory bits dir_bits = int(math.log(options.num_dirs, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) numa_bit = block_size_bits + dir_bits - 1 for i in range(options.num_dirs): dir_ranges = [] for r in system.mem_ranges: addr_range = m5.objects.AddrRange(r.start, size=r.size(), intlvHighBit=numa_bit, intlvBits=dir_bits, intlvMatch=i) dir_ranges.append(addr_range) dir_cntrl = DirCntrl(TCC_select_num_bits=0) dir_cntrl.create(options, dir_ranges, ruby_system, system) # Connect the Directory controller to the ruby network dir_cntrl.requestFromCores = MessageBuffer(ordered=True) dir_cntrl.requestFromCores.slave = ruby_system.network.master dir_cntrl.responseFromCores = MessageBuffer() dir_cntrl.responseFromCores.slave = ruby_system.network.master dir_cntrl.unblockFromCores = MessageBuffer() dir_cntrl.unblockFromCores.slave = ruby_system.network.master dir_cntrl.probeToCore = MessageBuffer() dir_cntrl.probeToCore.master = ruby_system.network.slave dir_cntrl.responseToCore = MessageBuffer() dir_cntrl.responseToCore.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered=True) dir_cntrl.L3triggerQueue = MessageBuffer(ordered=True) dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() exec("system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) mainCluster.add(dir_cntrl) # Technically this config can support an odd number of cpus, but the top # level config files, such as the ruby_random_tester, will get confused if # the number of cpus does not equal the number of sequencers. Thus make # sure that an even number of cpus is specified. assert ((options.num_cpus % 2) == 0) # For an odd number of CPUs, still create the right number of controllers cpuCluster = Cluster(extBW=512, intBW=512) # 1 TB/s for i in range((options.num_cpus + 1) // 2): cp_cntrl = CPCntrl() cp_cntrl.create(options, ruby_system, system) exec("system.cp_cntrl%d = cp_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) # Connect the CP controllers and the network cp_cntrl.requestFromCore = MessageBuffer() cp_cntrl.requestFromCore.master = ruby_system.network.slave cp_cntrl.responseFromCore = MessageBuffer() cp_cntrl.responseFromCore.master = ruby_system.network.slave cp_cntrl.unblockFromCore = MessageBuffer() cp_cntrl.unblockFromCore.master = ruby_system.network.slave cp_cntrl.probeToCore = MessageBuffer() cp_cntrl.probeToCore.slave = ruby_system.network.master cp_cntrl.responseToCore = MessageBuffer() cp_cntrl.responseToCore.slave = ruby_system.network.master cp_cntrl.mandatoryQueue = MessageBuffer() cp_cntrl.triggerQueue = MessageBuffer(ordered=True) cpuCluster.add(cp_cntrl) # Register CPUs and caches for each CorePair and directory (SE mode only) if not full_system: for i in range((options.num_cpus + 1) // 2): FileSystemConfig.register_cpu(physical_package_id=0, core_siblings=range( options.num_cpus), core_id=i * 2, thread_siblings=[]) FileSystemConfig.register_cpu(physical_package_id=0, core_siblings=range( options.num_cpus), core_id=i * 2 + 1, thread_siblings=[]) FileSystemConfig.register_cache(level=0, idu_type='Instruction', size=options.l1i_size, line_size=options.cacheline_size, assoc=options.l1i_assoc, cpus=[i * 2, i * 2 + 1]) FileSystemConfig.register_cache(level=0, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i * 2]) FileSystemConfig.register_cache(level=0, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i * 2 + 1]) FileSystemConfig.register_cache(level=1, idu_type='Unified', size=options.l2_size, line_size=options.cacheline_size, assoc=options.l2_assoc, cpus=[i * 2, i * 2 + 1]) for i in range(options.num_dirs): FileSystemConfig.register_cache( level=2, idu_type='Unified', size=options.l3_size, line_size=options.cacheline_size, assoc=options.l3_assoc, cpus=[n for n in range(options.num_cpus)]) # Assuming no DMA devices assert (len(dma_devices) == 0) # Add cpu/gpu clusters to main cluster mainCluster.add(cpuCluster) ruby_system.network.number_of_virtual_networks = 10 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
def create_system(options, full_system, system, dma_devices, bootmem, ruby_system, cpus): if buildEnv['PROTOCOL'] != 'GPU_VIPER': panic("This script requires the GPU_VIPER protocol to be built.") cpu_sequencers = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu # Clusters crossbar_bw = None mainCluster = None cpuCluster = None gpuCluster = None if hasattr(options, 'bw_scalor') and options.bw_scalor > 0: #Assuming a 2GHz clock crossbar_bw = 16 * options.num_compute_units * options.bw_scalor mainCluster = Cluster(intBW = crossbar_bw) cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) else: mainCluster = Cluster(intBW = 8) # 16 GB/s cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s # Create CPU directory controllers dir_cntrl_nodes = \ construct_dirs(options, system, ruby_system, ruby_system.network) for dir_cntrl in dir_cntrl_nodes: mainCluster.add(dir_cntrl) # Create CPU core pairs (cp_sequencers, cp_cntrl_nodes) = \ construct_corepairs(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(cp_sequencers) for cp_cntrl in cp_cntrl_nodes: cpuCluster.add(cp_cntrl) # Register CPUs and caches for each CorePair and directory (SE mode only) if not full_system: for i in range((options.num_cpus + 1) // 2): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = \ range(options.num_cpus), core_id = i*2, thread_siblings = []) FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = \ range(options.num_cpus), core_id = i*2+1, thread_siblings = []) FileSystemConfig.register_cache(level = 0, idu_type = 'Instruction', size = options.l1i_size, line_size = options.cacheline_size, assoc = options.l1i_assoc, cpus = [i*2, i*2+1]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*2]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*2+1]) FileSystemConfig.register_cache(level = 1, idu_type = 'Unified', size = options.l2_size, line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [i*2, i*2+1]) for i in range(options.num_dirs): FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = options.l3_size, line_size = options.cacheline_size, assoc = options.l3_assoc, cpus = [n for n in range(options.num_cpus)]) # Create TCPs (tcp_sequencers, tcp_cntrl_nodes) = \ construct_tcps(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(tcp_sequencers) for tcp_cntrl in tcp_cntrl_nodes: gpuCluster.add(tcp_cntrl) # Create SQCs (sqc_sequencers, sqc_cntrl_nodes) = \ construct_sqcs(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(sqc_sequencers) for sqc_cntrl in sqc_cntrl_nodes: gpuCluster.add(sqc_cntrl) # Create Scalars (scalar_sequencers, scalar_cntrl_nodes) = \ construct_scalars(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(scalar_sequencers) for scalar_cntrl in scalar_cntrl_nodes: gpuCluster.add(scalar_cntrl) # Create command processors (cmdproc_sequencers, cmdproc_cntrl_nodes) = \ construct_cmdprocs(options, system, ruby_system, ruby_system.network) cpu_sequencers.extend(cmdproc_sequencers) for cmdproc_cntrl in cmdproc_cntrl_nodes: gpuCluster.add(cmdproc_cntrl) # Create TCCs tcc_cntrl_nodes = \ construct_tccs(options, system, ruby_system, ruby_system.network) for tcc_cntrl in tcc_cntrl_nodes: gpuCluster.add(tcc_cntrl) for i, dma_device in enumerate(dma_devices): dma_seq = DMASequencer(version=i, ruby_system=ruby_system) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, ruby_system=ruby_system) exec('system.dma_cntrl%d = dma_cntrl' % i) # IDE doesn't have a .type but seems like everything else does. if not hasattr(dma_device, 'type'): exec('system.dma_cntrl%d.dma_sequencer.in_ports = dma_device' % i) elif dma_device.type == 'MemTest': exec('system.dma_cntrl%d.dma_sequencer.in_ports = dma_devices.test' % i) else: exec('system.dma_cntrl%d.dma_sequencer.in_ports = dma_device.dma' % i) dma_cntrl.requestToDir = MessageBuffer(buffer_size=0) dma_cntrl.requestToDir.out_port = ruby_system.network.in_port dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0) dma_cntrl.responseFromDir.in_port = ruby_system.network.out_port dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size = 0) gpuCluster.add(dma_cntrl) # Add cpu/gpu clusters to main cluster mainCluster.add(cpuCluster) mainCluster.add(gpuCluster) ruby_system.network.number_of_virtual_networks = 11 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
if options.checker: system.cpu[i].addCheckerCpu() system.cpu[i].createThreads() # Redirect filesystem syscalls from src to the first matching dests chroot = os.path.expanduser(options.chroot) redirect_paths = [ RedirectPath(src="/proc", dests=["%s/fs/proc" % m5.options.outdir]), RedirectPath(src="/sys", dests=["%s/fs/sys" % m5.options.outdir]), RedirectPath(src="/tmp", dests=["%s/fs/tmp" % m5.options.outdir]), RedirectPath(src="/", dests=["%s" % chroot]) ] system.redirect_paths = redirect_paths FileSystemConfig.config_filesystem(options) if options.ruby: Ruby.create_system(options, False, system) assert (options.num_cpus == len(system.ruby._cpu_ports)) system.ruby.clk_domain = SrcClockDomain( clock=options.ruby_clock, voltage_domain=system.voltage_domain) for i in xrange(np): ruby_port = system.ruby._cpu_ports[i] # Create the interrupt controller and connect its ports to Ruby # Note that the interrupt controller is always present but only # in x86 does it have message ports that need to be connected system.cpu[i].createInterruptController()
def makeTopology(self, options, full_system, network, IntLink, ExtLink, Router): nodes = self.nodes num_routers = options.num_cpus num_rows = options.mesh_rows # default values for link latency and router latency. # Can be over-ridden on a per link/router basis link_latency = options.link_latency # used by simple and garnet router_latency = options.router_latency # only used by garnet # There must be an evenly divisible number of cntrls to routers # Also, obviously the number or rows must be <= the number of routers cntrls_per_router, remainder = divmod(len(nodes), num_routers) assert (num_rows > 0 and num_rows <= num_routers) num_columns = int(num_routers / num_rows) assert (num_columns * num_rows == num_routers) # Create the routers in the mesh routers = [Router(router_id=i, latency = router_latency) \ for i in range(num_routers)] network.routers = routers # link counter to set unique link ids link_count = 0 # Add all but the remainder nodes to the list of nodes to be uniformly # distributed across the network. network_nodes = [] remainder_nodes = [] for node_index in xrange(len(nodes)): if node_index < (len(nodes) - remainder): network_nodes.append(nodes[node_index]) else: remainder_nodes.append(nodes[node_index]) # Connect each node to the appropriate router ext_links = [] for (i, n) in enumerate(network_nodes): cntrl_level, router_id = divmod(i, num_routers) assert (cntrl_level < cntrls_per_router) ext_links.append( ExtLink(link_id=link_count, ext_node=n, int_node=routers[router_id], latency=link_latency)) link_count += 1 # Connect the remainding nodes to router 0. These should only be # DMA nodes. for (i, node) in enumerate(remainder_nodes): assert (node.type == 'DMA_Controller') assert (i < remainder) ext_links.append( ExtLink(link_id=link_count, ext_node=node, int_node=routers[0], latency=link_latency)) link_count += 1 network.ext_links = ext_links # Create the mesh links. int_links = [] # East output to West input links (weight = 1) for row in xrange(num_rows): for col in xrange(num_columns): if (col + 1 < num_columns): east_out = col + (row * num_columns) west_in = (col + 1) + (row * num_columns) int_links.append( IntLink(link_id=link_count, src_node=routers[east_out], dst_node=routers[west_in], src_outport="East", dst_inport="West", latency=link_latency, weight=1)) link_count += 1 # West output to East input links (weight = 1) for row in xrange(num_rows): for col in xrange(num_columns): if (col + 1 < num_columns): east_in = col + (row * num_columns) west_out = (col + 1) + (row * num_columns) int_links.append( IntLink(link_id=link_count, src_node=routers[west_out], dst_node=routers[east_in], src_outport="West", dst_inport="East", latency=link_latency, weight=1)) link_count += 1 # North output to South input links (weight = 2) for col in xrange(num_columns): for row in xrange(num_rows): if (row + 1 < num_rows): north_out = col + (row * num_columns) south_in = col + ((row + 1) * num_columns) int_links.append( IntLink(link_id=link_count, src_node=routers[north_out], dst_node=routers[south_in], src_outport="North", dst_inport="South", latency=link_latency, weight=2)) link_count += 1 # South output to North input links (weight = 2) for col in xrange(num_columns): for row in xrange(num_rows): if (row + 1 < num_rows): north_in = col + (row * num_columns) south_out = col + ((row + 1) * num_columns) int_links.append( IntLink(link_id=link_count, src_node=routers[south_out], dst_node=routers[north_in], src_outport="South", dst_inport="North", latency=link_latency, weight=2)) link_count += 1 network.int_links = int_links # Register nodes with filesystem if not full_system: for i in xrange(options.num_cpus): FileSystemConfig.register_node([i], MemorySize(options.mem_size) / options.num_cpus)
def create_system(options, full_system, system, dma_devices, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_AMD_Base': panic("This script requires the MOESI_AMD_Base protocol.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to # be consistent with the NetDest list. Therefore the l1 controller # nodes must be listed before the directory nodes and directory nodes # before dma nodes, etc. # l1_cntrl_nodes = [] l3_cntrl_nodes = [] dir_cntrl_nodes = [] control_count = 0 # # Must create the individual controllers before the network to ensure # the controller constructors are called before the network constructor # # This is the base crossbar that connects the L3s, Dirs, and cpu # Cluster mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s if options.numa_high_bit: numa_bit = options.numa_high_bit else: # if the numa_bit is not specified, set the directory bits as the # lowest bits above the block offset bits, and the numa_bit as the # highest of those directory bits dir_bits = int(math.log(options.num_dirs, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) numa_bit = block_size_bits + dir_bits - 1 for i in range(options.num_dirs): dir_ranges = [] for r in system.mem_ranges: addr_range = m5.objects.AddrRange(r.start, size = r.size(), intlvHighBit = numa_bit, intlvBits = dir_bits, intlvMatch = i) dir_ranges.append(addr_range) dir_cntrl = DirCntrl(TCC_select_num_bits = 0) dir_cntrl.create(options, dir_ranges, ruby_system, system) # Connect the Directory controller to the ruby network dir_cntrl.requestFromCores = MessageBuffer(ordered = True) dir_cntrl.requestFromCores.slave = ruby_system.network.master dir_cntrl.responseFromCores = MessageBuffer() dir_cntrl.responseFromCores.slave = ruby_system.network.master dir_cntrl.unblockFromCores = MessageBuffer() dir_cntrl.unblockFromCores.slave = ruby_system.network.master dir_cntrl.probeToCore = MessageBuffer() dir_cntrl.probeToCore.master = ruby_system.network.slave dir_cntrl.responseToCore = MessageBuffer() dir_cntrl.responseToCore.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered = True) dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True) dir_cntrl.responseFromMemory = MessageBuffer() exec("system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) mainCluster.add(dir_cntrl) # Technically this config can support an odd number of cpus, but the top # level config files, such as the ruby_random_tester, will get confused if # the number of cpus does not equal the number of sequencers. Thus make # sure that an even number of cpus is specified. assert((options.num_cpus % 2) == 0) # For an odd number of CPUs, still create the right number of controllers cpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s for i in range((options.num_cpus + 1) // 2): cp_cntrl = CPCntrl() cp_cntrl.create(options, ruby_system, system) exec("system.cp_cntrl%d = cp_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1]) # Connect the CP controllers and the network cp_cntrl.requestFromCore = MessageBuffer() cp_cntrl.requestFromCore.master = ruby_system.network.slave cp_cntrl.responseFromCore = MessageBuffer() cp_cntrl.responseFromCore.master = ruby_system.network.slave cp_cntrl.unblockFromCore = MessageBuffer() cp_cntrl.unblockFromCore.master = ruby_system.network.slave cp_cntrl.probeToCore = MessageBuffer() cp_cntrl.probeToCore.slave = ruby_system.network.master cp_cntrl.responseToCore = MessageBuffer() cp_cntrl.responseToCore.slave = ruby_system.network.master cp_cntrl.mandatoryQueue = MessageBuffer() cp_cntrl.triggerQueue = MessageBuffer(ordered = True) cpuCluster.add(cp_cntrl) # Register CPUs and caches for each CorePair and directory (SE mode only) if not full_system: FileSystemConfig.config_filesystem(options) for i in xrange((options.num_cpus + 1) // 2): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = xrange(options.num_cpus), core_id = i*2, thread_siblings = []) FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = xrange(options.num_cpus), core_id = i*2+1, thread_siblings = []) FileSystemConfig.register_cache(level = 0, idu_type = 'Instruction', size = options.l1i_size, line_size = options.cacheline_size, assoc = options.l1i_assoc, cpus = [i*2, i*2+1]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*2]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*2+1]) FileSystemConfig.register_cache(level = 1, idu_type = 'Unified', size = options.l2_size, line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [i*2, i*2+1]) for i in range(options.num_dirs): FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = options.l3_size, line_size = options.cacheline_size, assoc = options.l3_assoc, cpus = [n for n in xrange(options.num_cpus)]) # Assuming no DMA devices assert(len(dma_devices) == 0) # Add cpu/gpu clusters to main cluster mainCluster.add(cpuCluster) ruby_system.network.number_of_virtual_networks = 10 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
print("Switch at instruction count: %d" % cpu_list[0].max_insts_any_thread) exit_event = m5.simulate(maxtick) if options.fast_forward: if exit_event.getCause() == "a thread reached the max instruction count": m5.switchCpus(system, switch_cpu_list) print("Switched CPUS @ tick %s" % (m5.curTick())) m5.stats.reset() exit_event = m5.simulate(maxtick - m5.curTick()) elif options.fast_forward_pseudo_op: while exit_event.getCause() == "switchcpu": # If we are switching *to* kvm, then the current stats are meaningful # Note that we don't do any warmup by default if type(switch_cpu_list[0][0]) == FutureCpuClass: print("Dumping stats...") m5.stats.dump() m5.switchCpus(system, switch_cpu_list) print("Switched CPUS @ tick %s" % (m5.curTick())) m5.stats.reset() # This lets us switch back and forth without keeping a counter switch_cpu_list = [(x[1], x[0]) for x in switch_cpu_list] exit_event = m5.simulate(maxtick - m5.curTick()) print("Ticks:", m5.curTick()) print('Exiting because ', exit_event.getCause()) FileSystemConfig.cleanup_filesystem(options) sys.exit(exit_event.getCode())
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Three_Level': fatal("This script requires the MESI_Three_Level protocol to be\ built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes # must be listed before the directory nodes and directory nodes before # dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) num_cpus_per_cluster = options.num_cpus / options.num_clusters assert (options.num_l2caches % options.num_clusters == 0) num_l2caches_per_cluster = options.num_l2caches / options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_index_start = block_size_bits + l2_bits # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # for i in range(options.num_clusters): for j in range(num_cpus_per_cluster): # # First create the Ruby objects associated with this cpu # l0i_cache = L0Cache(size = '4096B', assoc = 1, is_icache = True, start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) l0d_cache = L0Cache(size = '4096B', assoc = 1, is_icache = False, start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l0_cntrl = L0Cache_Controller( version = i * num_cpus_per_cluster + j, Icache = l0i_cache, Dcache = l0d_cache, send_evictions = send_evicts(options), clk_domain = clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j, icache = l0i_cache, clk_domain = clk_domain, dcache = l0d_cache, ruby_system = ruby_system) l0_cntrl.sequencer = cpu_seq l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) l1_cntrl = L1Cache_Controller( version = i * num_cpus_per_cluster + j, cache = l1_cache, l2_select_num_bits = l2_bits, cluster_id = i, ruby_system = ruby_system) exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( i * num_cpus_per_cluster + j)) exec("ruby_system.l1_cntrl%d = l1_cntrl" % ( i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) l0_cntrl_nodes.append(l0_cntrl) l1_cntrl_nodes.append(l1_cntrl) # Connect the L0 and L1 controllers l0_cntrl.mandatoryQueue = MessageBuffer() l0_cntrl.bufferToL1 = MessageBuffer(ordered = True) l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1 l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True) l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1 # Connect the L1 controllers and the network l1_cntrl.requestToL2 = MessageBuffer() l1_cntrl.requestToL2.master = ruby_system.network.slave l1_cntrl.responseToL2 = MessageBuffer() l1_cntrl.responseToL2.master = ruby_system.network.slave l1_cntrl.unblockToL2 = MessageBuffer() l1_cntrl.unblockToL2.master = ruby_system.network.slave l1_cntrl.requestFromL2 = MessageBuffer() l1_cntrl.requestFromL2.slave = ruby_system.network.master l1_cntrl.responseFromL2 = MessageBuffer() l1_cntrl.responseFromL2.slave = ruby_system.network.master for j in range(num_l2caches_per_cluster): l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller( version = i * num_l2caches_per_cluster + j, L2cache = l2_cache, cluster_id = i, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % (i * num_l2caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain = ruby_system.clk_domain, clk_divider = 3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l0_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: FileSystemConfig.config_filesystem(options) for i in xrange(options.num_clusters): for j in xrange(num_cpus_per_cluster): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = xrange(options.num_cpus), core_id = i*num_cpus_per_cluster+j, thread_siblings = []) FileSystemConfig.register_cache(level = 0, idu_type = 'Instruction', size = '4096B', line_size = options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = '4096B', line_size = options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 1, idu_type = 'Unified', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = str(MemorySize(options.l2_size) * \ num_l2caches_per_cluster)+'B', line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [n for n in xrange(i*num_cpus_per_cluster, \ (i+1)*num_cpus_per_cluster)]) ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, piobus = None, dma_ports = [], bootmem=None): system.ruby = RubySystem() ruby = system.ruby # Generate pseudo filesystem FileSystemConfig.config_filesystem(system, options) # Create the network object (network, IntLinkClass, ExtLinkClass, RouterClass, InterfaceClass) = \ Network.create_network(options, ruby) ruby.network = network protocol = buildEnv['PROTOCOL'] exec("from . import %s" % protocol) try: (cpu_sequencers, dir_cntrls, topology) = \ eval("%s.create_system(options, full_system, system, dma_ports,\ bootmem, ruby)" % protocol) except: print("Error: could not create sytem for ruby protocol %s" % protocol) raise # Create the network topology topology.makeTopology(options, network, IntLinkClass, ExtLinkClass, RouterClass) # Register the topology elements with faux filesystem (SE mode only) if not full_system: topology.registerTopology(options) # Initialize network based on topology Network.init_network(options, network, InterfaceClass) # Create a port proxy for connecting the system port. This is # independent of the protocol and kept in the protocol-agnostic # part (i.e. here). sys_port_proxy = RubyPortProxy(ruby_system = ruby) if piobus is not None: sys_port_proxy.pio_master_port = piobus.slave # Give the system port proxy a SimObject parent without creating a # full-fledged controller system.sys_port_proxy = sys_port_proxy # Connect the system port for loading of binaries etc system.system_port = system.sys_port_proxy.slave setup_memory_controllers(system, ruby, dir_cntrls, options) # Connect the cpu sequencers and the piobus if piobus != None: for cpu_seq in cpu_sequencers: cpu_seq.pio_master_port = piobus.slave cpu_seq.mem_master_port = piobus.slave if buildEnv['TARGET_ISA'] == "x86": cpu_seq.pio_slave_port = piobus.master ruby.number_of_virtual_networks = ruby.network.number_of_virtual_networks ruby._cpu_ports = cpu_sequencers ruby.num_of_sequencers = len(cpu_sequencers) # Create a backing copy of physical memory in case required if options.access_backing_store: ruby.access_backing_store = True ruby.phys_mem = SimpleMemory(range=system.mem_ranges[0], in_addr_map=False)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_hammer': panic("This script requires the MOESI_hammer protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in range(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits) l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, L2cache=l2_cache, no_mig_atomic=not \ options.allow_atomic_migration, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache,clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: l1_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave l1_cntrl.triggerQueue = MessageBuffer() # Connect the buffers from the network to the controller l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the # size of the L2 cache. # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index assert(options.numa_high_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits else: if dir_bits > 0: pf_start_bit = dir_bits + block_size_bits - 1 else: pf_start_bit = block_size_bits # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: pf = ProbeFilter(size = pf_size, assoc = 4, start_index_bit = pf_start_bit) dir_cntrl.probeFilter = pf dir_cntrl.probe_filter_enabled = options.pf_on dir_cntrl.full_bit_dir_enabled = options.dir_on if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered = True) dir_cntrl.unblockToDir = MessageBuffer() dir_cntrl.unblockToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) if options.recycle_latency: dma_cntrl.recycle_latency = options.recycle_latency # Connect the dma controller to the network dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.mandatoryQueue = MessageBuffer() all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.mandatoryQueue = MessageBuffer() all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: for i in xrange(options.num_cpus): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = [], core_id = i, thread_siblings = []) FileSystemConfig.register_cache(level = 1, idu_type = 'Instruction', size = options.l1i_size, line_size = options.cacheline_size, assoc = options.l1i_assoc, cpus = [i]) FileSystemConfig.register_cache(level = 1, idu_type = 'Data', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i]) FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = options.l2_size, line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [i]) ruby_system.network.number_of_virtual_networks = 6 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)