def create(self, options, ruby_system, system): self.version = self.versionCount() self.L1Icache = L1Cache() self.L1Icache.create(options.l1i_size, options.l1i_assoc, options) self.L1D0cache = L1Cache() self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options) self.L1D1cache = L1Cache() self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options) self.L2cache = L2Cache() self.L2cache.create(options.l2_size, options.l2_assoc, options) self.sequencer = RubySequencer() self.sequencer.version = self.seqCount() self.sequencer.icache = self.L1Icache self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True self.sequencer1 = RubySequencer() self.sequencer1.version = self.seqCount() self.sequencer1.icache = self.L1Icache self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system self.sequencer1.coreid = 1 self.sequencer1.is_cpu_sequencer = True self.issue_latency = options.cpu_to_dir_latency self.send_evictions = send_evicts(options) self.ruby_system = ruby_system if options.recycle_latency: self.recycle_latency = options.recycle_latency
def create(self, options, ruby_system, system): self.version = self.versionCount() self.L1Icache = L1ICache() self.L1Icache.create(options) self.L1D0cache = L1DCache() self.L1D0cache.create(options) self.L1D1cache = L1DCache() self.L1D1cache.create(options) self.L2cache = L2Cache() self.L2cache.create(options) self.sequencer = RubySequencer() self.sequencer.version = self.seqCount() self.sequencer.icache = self.L1Icache self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True self.sequencer1 = RubySequencer() self.sequencer1.version = self.seqCount() self.sequencer1.icache = self.L1Icache self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system self.sequencer1.coreid = 1 self.sequencer1.is_cpu_sequencer = True # Defines icache/dcache hit latency self.mandatory_queue_latency = 2 self.issue_latency = options.cpu_to_dir_latency self.send_evictions = send_evicts(options) self.ruby_system = ruby_system if options.recycle_latency: self.recycle_latency = options.recycle_latency
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_CMP_directory': panic("This script requires the MOESI_CMP_directory protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in range(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestFromL1Cache = MessageBuffer() l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave l1_cntrl.responseFromL1Cache = MessageBuffer() l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave l1_cntrl.requestToL1Cache = MessageBuffer() l1_cntrl.requestToL1Cache.slave = ruby_system.network.master l1_cntrl.responseToL1Cache = MessageBuffer() l1_cntrl.responseToL1Cache.slave = ruby_system.network.master l1_cntrl.triggerQueue = MessageBuffer(ordered = True) # Create the L2s interleaved addr ranges l2_addr_ranges = [] l2_bits = int(math.log(options.num_l2caches, 2)) numa_bit = block_size_bits + l2_bits - 1 sysranges = [] + system.mem_ranges if bootmem: sysranges.append(bootmem.range) for i in range(options.num_l2caches): ranges = [] for r in sysranges: addr_range = AddrRange(r.start, size = r.size(), intlvHighBit = numa_bit, intlvBits = l2_bits, intlvMatch = i) ranges.append(addr_range) l2_addr_ranges.append(ranges) for i in range(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = block_size_bits + l2_bits) l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, transitions_per_cycle = options.ports, ruby_system = ruby_system, addr_ranges = l2_addr_ranges[i]) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.GlobalRequestFromL2Cache = MessageBuffer() l2_cntrl.GlobalRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.GlobalRequestToL2Cache = MessageBuffer() l2_cntrl.GlobalRequestToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master l2_cntrl.triggerQueue = MessageBuffer(ordered = True) # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system. # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer() dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.reqToDir = MessageBuffer() dma_cntrl.reqToDir.master = ruby_system.network.slave dma_cntrl.respToDir = MessageBuffer() dma_cntrl.respToDir.master = ruby_system.network.slave dma_cntrl.triggerQueue = MessageBuffer(ordered = True) all_cntrls = l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer() io_controller.responseFromDir.slave = ruby_system.network.master io_controller.reqToDir = MessageBuffer() io_controller.reqToDir.master = ruby_system.network.slave io_controller.respToDir = MessageBuffer() io_controller.respToDir.master = ruby_system.network.slave io_controller.triggerQueue = MessageBuffer(ordered = True) all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MI_example': panic("This script requires the MI_example protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # Only one cache exists for this protocol, so by default use the L1D # config parameters. # cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain # Only one unified L1 cache exists. Can cache instructions and data. l1_cntrl = L1Cache_Controller(version=i, cacheMemory=cache, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=cache, dcache=cache, clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestFromCache = MessageBuffer(ordered=True) l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer(ordered=True) l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.forwardToCache = MessageBuffer(ordered=True) l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer(ordered=True) l1_cntrl.responseToCache.slave = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert (phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system. # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, system.mem_ranges, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer(ordered=True) dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.requestToMemory = MessageBuffer() dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the directory controllers and the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: FileSystemConfig.config_filesystem(options) for i in xrange(options.num_cpus): FileSystemConfig.register_cpu(physical_package_id=0, core_siblings=xrange( options.num_cpus), core_id=i, thread_siblings=[]) FileSystemConfig.register_cache(level=1, idu_type='Data', size=options.l1d_size, line_size=options.cacheline_size, assoc=options.l1d_assoc, cpus=[i]) ruby_system.network.number_of_virtual_networks = 5 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_CMP_token': panic("This script requires the MOESI_CMP_token protocol to be built.") # # number of tokens that the owner passes to requests so that shared blocks can # respond to read requests # n_tokens = options.num_cpus + 1 cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits) l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits) l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, l2_select_num_bits = l2_bits, N_tokens = n_tokens, retry_threshold = \ options.l1_retries, fixed_timeout_latency = \ options.timeout_latency, dynamic_timeout_enabled = \ not options.disable_dyn_timeouts, no_mig_atomic = not \ options.allow_atomic_migration, send_evictions = send_evicts(options), transitions_per_cycle = options.ports, clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=system.cpu[i].clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.requestFromL1Cache = MessageBuffer() l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave l1_cntrl.responseFromL1Cache = MessageBuffer() l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave l1_cntrl.persistentFromL1Cache = MessageBuffer(ordered=True) l1_cntrl.persistentFromL1Cache.master = ruby_system.network.slave l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestToL1Cache = MessageBuffer() l1_cntrl.requestToL1Cache.slave = ruby_system.network.master l1_cntrl.responseToL1Cache = MessageBuffer() l1_cntrl.responseToL1Cache.slave = ruby_system.network.master l1_cntrl.persistentToL1Cache = MessageBuffer(ordered=True) l1_cntrl.persistentToL1Cache.slave = ruby_system.network.master l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=l2_index_start) l2_cntrl = L2Cache_Controller(version=i, L2cache=l2_cache, N_tokens=n_tokens, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.GlobalRequestFromL2Cache = MessageBuffer() l2_cntrl.GlobalRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.GlobalRequestToL2Cache = MessageBuffer() l2_cntrl.GlobalRequestToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master l2_cntrl.persistentToL2Cache = MessageBuffer(ordered=True) l2_cntrl.persistentToL2Cache.slave = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert (phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version=i, directory=RubyDirectoryMemory( version=i, size=dir_size), l2_select_num_bits=l2_bits, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.persistentToDir = MessageBuffer(ordered=True) dir_cntrl.persistentToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.requestFromDir = MessageBuffer() dir_cntrl.requestFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.persistentFromDir = MessageBuffer(ordered=True) dir_cntrl.persistentFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system, slave=dma_port) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.reqToDirectory = MessageBuffer() dma_cntrl.reqToDirectory.master = ruby_system.network.slave all_cntrls = l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.reqToDirectory = MessageBuffer() io_controller.reqToDirectory.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Three_Level': fatal("This script requires the MESI_Three_Level protocol to be\ built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes # must be listed before the directory nodes and directory nodes before # dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) num_cpus_per_cluster = options.num_cpus / options.num_clusters assert (options.num_l2caches % options.num_clusters == 0) num_l2caches_per_cluster = options.num_l2caches / options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_index_start = block_size_bits + l2_bits # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # for i in xrange(options.num_clusters): for j in xrange(num_cpus_per_cluster): # # First create the Ruby objects associated with this cpu # l0i_cache = L0Cache(size = '4096B', assoc = 1, is_icache = True, start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) l0d_cache = L0Cache(size = '4096B', assoc = 1, is_icache = False, start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l0_cntrl = L0Cache_Controller( version = i * num_cpus_per_cluster + j, Icache = l0i_cache, Dcache = l0d_cache, send_evictions = send_evicts(options), clk_domain = clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j, icache = l0i_cache, clk_domain = clk_domain, dcache = l0d_cache, ruby_system = ruby_system) l0_cntrl.sequencer = cpu_seq l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) l1_cntrl = L1Cache_Controller( version = i * num_cpus_per_cluster + j, cache = l1_cache, l2_select_num_bits = l2_bits, cluster_id = i, ruby_system = ruby_system) exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( i * num_cpus_per_cluster + j)) exec("ruby_system.l1_cntrl%d = l1_cntrl" % ( i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) l0_cntrl_nodes.append(l0_cntrl) l1_cntrl_nodes.append(l1_cntrl) # Connect the L0 and L1 controllers l0_cntrl.mandatoryQueue = MessageBuffer() l0_cntrl.bufferToL1 = MessageBuffer(ordered = True) l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1 l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True) l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1 # Connect the L1 controllers and the network l1_cntrl.requestToL2 = MessageBuffer() l1_cntrl.requestToL2.master = ruby_system.network.slave l1_cntrl.responseToL2 = MessageBuffer() l1_cntrl.responseToL2.master = ruby_system.network.slave l1_cntrl.unblockToL2 = MessageBuffer() l1_cntrl.unblockToL2.master = ruby_system.network.slave l1_cntrl.requestFromL2 = MessageBuffer() l1_cntrl.requestFromL2.slave = ruby_system.network.master l1_cntrl.responseFromL2 = MessageBuffer() l1_cntrl.responseFromL2.slave = ruby_system.network.master for j in xrange(num_l2caches_per_cluster): l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller( version = i * num_l2caches_per_cluster + j, L2cache = l2_cache, cluster_id = i, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % (i * num_l2caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain = ruby_system.clk_domain, clk_divider = 3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l0_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MI_example': panic("This script requires the MI_example protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # Only one cache exists for this protocol, so by default use the L1D # config parameters. # cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain # Only one unified L1 cache exists. Can cache instructions and data. l1_cntrl = L1Cache_Controller(version=i, cacheMemory=cache, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=cache, dcache=cache, clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestFromCache = MessageBuffer(ordered = True) l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer(ordered = True) l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.forwardToCache = MessageBuffer(ordered = True) l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer(ordered = True) l1_cntrl.responseToCache.slave = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert(phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system. # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, directory = RubyDirectoryMemory( version = i, size = dir_size), transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer(ordered = True) dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the directory controllers and the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 5 topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_CMP_token': panic("This script requires the MOESI_CMP_token protocol to be built.") # # number of tokens that the owner passes to requests so that shared blocks can # respond to read requests # n_tokens = options.num_cpus + 1 cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, l2_select_num_bits=l2_bits, N_tokens=n_tokens, retry_threshold=options.l1_retries, fixed_timeout_latency=\ options.timeout_latency, dynamic_timeout_enabled=\ not options.disable_dyn_timeouts, no_mig_atomic=not \ options.allow_atomic_migration, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.requestFromL1Cache = MessageBuffer() l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave l1_cntrl.responseFromL1Cache = MessageBuffer() l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave l1_cntrl.persistentFromL1Cache = MessageBuffer(ordered = True) l1_cntrl.persistentFromL1Cache.master = ruby_system.network.slave l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestToL1Cache = MessageBuffer() l1_cntrl.requestToL1Cache.slave = ruby_system.network.master l1_cntrl.responseToL1Cache = MessageBuffer() l1_cntrl.responseToL1Cache.slave = ruby_system.network.master l1_cntrl.persistentToL1Cache = MessageBuffer(ordered = True) l1_cntrl.persistentToL1Cache.slave = ruby_system.network.master l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, N_tokens = n_tokens, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.GlobalRequestFromL2Cache = MessageBuffer() l2_cntrl.GlobalRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.GlobalRequestToL2Cache = MessageBuffer() l2_cntrl.GlobalRequestToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master l2_cntrl.persistentToL2Cache = MessageBuffer(ordered = True) l2_cntrl.persistentToL2Cache.slave = ruby_system.network.master # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: dir_cntrl.l2_select_num_bits = l2_bits # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.persistentToDir = MessageBuffer(ordered = True) dir_cntrl.persistentToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.requestFromDir = MessageBuffer() dir_cntrl.requestFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.persistentFromDir = MessageBuffer(ordered = True) dir_cntrl.persistentFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.reqToDirectory = MessageBuffer() dma_cntrl.reqToDirectory.master = ruby_system.network.slave all_cntrls = l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.reqToDirectory = MessageBuffer() io_controller.reqToDirectory.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 6 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv["PROTOCOL"] != "MESI_Three_Level": fatal("This script requires the MESI_Three_Level protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] assert options.num_cpus % options.num_clusters == 0 num_cpus_per_cluster = options.num_cpus / options.num_clusters assert options.num_l2caches % options.num_clusters == 0 num_l2caches_per_cluster = options.num_l2caches / options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_index_start = block_size_bits + l2_bits # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # for i in xrange(options.num_clusters): for j in xrange(num_cpus_per_cluster): # # First create the Ruby objects associated with this cpu # l0i_cache = L0Cache( size="4096B", assoc=1, is_icache=True, start_index_bit=block_size_bits, replacement_policy=LRUReplacementPolicy(), ) l0d_cache = L0Cache( size="4096B", assoc=1, is_icache=False, start_index_bit=block_size_bits, replacement_policy=LRUReplacementPolicy(), ) l0_cntrl = L0Cache_Controller( version=i * num_cpus_per_cluster + j, Icache=l0i_cache, Dcache=l0d_cache, send_evictions=send_evicts(options), clk_domain=system.cpu[i].clk_domain, ruby_system=ruby_system, ) cpu_seq = RubySequencer( version=i * num_cpus_per_cluster + j, icache=l0i_cache, clk_domain=system.cpu[i].clk_domain, dcache=l0d_cache, ruby_system=ruby_system, ) l0_cntrl.sequencer = cpu_seq l1_cache = L1Cache( size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits, is_icache=False ) l1_cntrl = L1Cache_Controller( version=i * num_cpus_per_cluster + j, cache=l1_cache, l2_select_num_bits=l2_bits, cluster_id=i, ruby_system=ruby_system, ) exec("ruby_system.l0_cntrl%d = l0_cntrl" % (i * num_cpus_per_cluster + j)) exec("ruby_system.l1_cntrl%d = l1_cntrl" % (i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) l0_cntrl_nodes.append(l0_cntrl) l1_cntrl_nodes.append(l1_cntrl) # Connect the L0 and L1 controllers l0_cntrl.mandatoryQueue = MessageBuffer() l0_cntrl.bufferToL1 = MessageBuffer(ordered=True) l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1 l0_cntrl.bufferFromL1 = MessageBuffer(ordered=True) l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1 # Connect the L1 controllers and the network l1_cntrl.requestToL2 = MessageBuffer() l1_cntrl.requestToL2.master = ruby_system.network.slave l1_cntrl.responseToL2 = MessageBuffer() l1_cntrl.responseToL2.master = ruby_system.network.slave l1_cntrl.unblockToL2 = MessageBuffer() l1_cntrl.unblockToL2.master = ruby_system.network.slave l1_cntrl.requestFromL2 = MessageBuffer() l1_cntrl.requestFromL2.slave = ruby_system.network.master l1_cntrl.responseFromL2 = MessageBuffer() l1_cntrl.responseFromL2.slave = ruby_system.network.master for j in xrange(num_l2caches_per_cluster): l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=l2_index_start) l2_cntrl = L2Cache_Controller( version=i * num_l2caches_per_cluster + j, L2cache=l2_cache, cluster_id=i, transitions_per_cycle=options.ports, ruby_system=ruby_system, ) exec("ruby_system.l2_cntrl%d = l2_cntrl" % (i * num_l2caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert phys_mem_size % options.num_dirs == 0 mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain(clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): # # Create the Ruby objects associated with the directory controller # dir_size = MemorySize("0B") dir_size.value = mem_module_size dir_cntrl = Directory_Controller( version=i, directory=RubyDirectoryMemory(version=i, size=dir_size), transitions_per_cycle=options.ports, ruby_system=ruby_system, ) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system) dma_cntrl = DMA_Controller( version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system ) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l0_cntrl_nodes + l1_cntrl_nodes + l2_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_hammer': panic("This script requires the MOESI_hammer protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits, is_icache=True) l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits) l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=block_size_bits) l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, L2cache = l2_cache, no_mig_atomic = not \ options.allow_atomic_migration, send_evictions = send_evicts(options), transitions_per_cycle = options.ports, clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=system.cpu[i].clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: l1_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave l1_cntrl.triggerQueue = MessageBuffer() # Connect the buffers from the network to the controller l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master #if total-mem-size is set (defined gem5-gpu options) then we ignore the benchmark settings try: phys_mem_size = AddrRange(options.total_mem_size).size() except AttributeError: phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) print "Number of dirs is %s" % options.num_dirs assert (phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the # size of the L2 cache. # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index assert (options.numa_high_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits else: if dir_bits > 0: pf_start_bit = dir_bits + block_size_bits - 1 else: pf_start_bit = block_size_bits # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size pf = ProbeFilter(size=pf_size, assoc=4, start_index_bit=pf_start_bit) dir_cntrl = Directory_Controller(version=i, directory=RubyDirectoryMemory( version=i, size=dir_size), probeFilter=pf, probe_filter_enabled=options.pf_on, full_bit_dir_enabled=options.dir_on, transitions_per_cycle=options.ports, ruby_system=ruby_system) if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered=True) dir_cntrl.unblockToDir = MessageBuffer() dir_cntrl.unblockToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system, slave=dma_port) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) if options.recycle_latency: dma_cntrl.recycle_latency = options.recycle_latency # Connect the dma controller to the network dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.mandatoryQueue = MessageBuffer() all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.mandatoryQueue = MessageBuffer() all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if 'VI_hammer' not in buildEnv['PROTOCOL']: panic("This script requires the VI_hammer protocol to be built.") options.access_backing_store = True cpu_sequencers = [] topology = Cluster(intBW=32, extBW=32) # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits_float = math.log(options.num_l2caches, 2) l2_bits = int(l2_bits_float) if l2_bits_float > l2_bits: l2_bits += 1 block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits, is_icache=True) l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits) l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=block_size_bits) l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, L2cache = l2_cache, no_mig_atomic = not \ options.allow_atomic_migration, send_evictions = send_evicts(options), transitions_per_cycle = options.ports, ruby_system = ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: l1_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) topology.add(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave # Connect the buffers from the network to the controller l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.triggerQueue = MessageBuffer() cpu_mem_range = AddrRange(options.total_mem_size) mem_module_size = cpu_mem_range.size() / options.num_dirs # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the # size of the L2 cache. # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index assert (options.numa_high_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits else: if dir_bits > 0: pf_start_bit = dir_bits + block_size_bits - 1 else: pf_start_bit = block_size_bits dir_cntrl_nodes = [] for i in xrange(options.num_dirs): # # Create the Ruby objects associated with the directory controller # dir_size = MemorySize('0B') dir_size.value = mem_module_size pf = ProbeFilter(size=pf_size, assoc=4, start_index_bit=pf_start_bit) dir_cntrl = Directory_Controller(version = i, directory = \ RubyDirectoryMemory( \ version = i, size = dir_size, numa_high_bit = \ options.numa_high_bit), probeFilter = pf, probe_filter_enabled = options.pf_on, full_bit_dir_enabled = options.dir_on, transitions_per_cycle = options.ports, ruby_system = ruby_system) if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.unblockToDir = MessageBuffer() dir_cntrl.unblockToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.triggerQueue = MessageBuffer(ordered=True) dir_cntrl.responseFromMemory = MessageBuffer() dma_cntrl_nodes = [] for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) if options.recycle_latency: dma_cntrl.recycle_latency = options.recycle_latency # Connect the dma controller to the network dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.mandatoryQueue = MessageBuffer() # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.mandatoryQueue = MessageBuffer() dma_cntrl_nodes.append(io_controller) return (cpu_sequencers, dir_cntrl_nodes, dma_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Two_Level': fatal("This script requires the MESI_Two_Level protocol to be built.") ruby_system.num_simics_net_ports = options.num_networkports ruby_system.num_accelerators = options.accelerators ruby_system.num_TDs = options.num_tds cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # netport_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_cache_sets = MemorySize( options.l2_size) / options.cacheline_size / options.l2_assoc l2_cache_set_bits = int(math.log(l2_cache_sets, 2)) print "l2_cache_set_bits = %d" % l2_cache_set_bits assert (options.num_networkports == options.num_l2caches) num_l1_cntrls = ( (options.accelerators + options.num_tds + options.num_networkports - 1) / options.num_networkports) * options.num_networkports print "num_l1_cntrls = %d" % num_l1_cntrls assert (num_l1_cntrls >= (options.accelerators + options.num_tds)) for i in xrange(options.num_networkports): # First create the Ruby objects associated with # the CPU and Accelerator signal communication netport_cntrl = gem5NetworkPortInterface_Controller( version=i, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.netport_cntrl%d = netport_cntrl" % i) netport_cntrl_nodes.append(netport_cntrl) # Connect the netport controller to the network netport_cntrl.messageOut = ruby_system.network.slave netport_cntrl.messageIn = ruby_system.network.master for i in xrange(num_l1_cntrls): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits, is_icache=True) l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits, is_icache=False) prefetcher = RubyPrefetcher.Prefetcher() l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, l2_select_num_bits=l2_bits, l2_select_low_bit=(block_size_bits + l2_cache_set_bits), send_evictions=send_evicts(options), prefetcher=prefetcher, ruby_system=ruby_system, clk_domain=system.cpu[0].clk_domain, transitions_per_cycle=options.ports, enable_prefetch=False) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=system.cpu[0].clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists if len(cpu_sequencers) < options.num_cpus: cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.requestFromL1Cache = ruby_system.network.slave l1_cntrl.responseFromL1Cache = ruby_system.network.slave l1_cntrl.unblockFromL1Cache = ruby_system.network.slave l1_cntrl.requestToL1Cache = ruby_system.network.master l1_cntrl.responseToL1Cache = ruby_system.network.master l2_index_start = block_size_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=l2_index_start) l2_cntrl = L2Cache_Controller(version=i, L2cache=l2_cache, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = ruby_system.network.slave l2_cntrl.responseFromL2Cache = ruby_system.network.slave l2_cntrl.unblockToL2Cache = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = ruby_system.network.master l2_cntrl.responseToL2Cache = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert (phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version=i, directory=RubyDirectoryMemory( version=i, size=dir_size), transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = ruby_system.network.master dir_cntrl.responseToDir = ruby_system.network.master dir_cntrl.responseFromDir = ruby_system.network.slave for i, dma_port in enumerate(dma_ports): # Create the Ruby objects associated with the dma controller dma_seq = DMASequencer(version=i, ruby_system=ruby_system, slave=dma_port) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.responseFromDir = ruby_system.network.master dma_cntrl.requestToDir = ruby_system.network.slave all_cntrls = netport_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = ruby_system.network.master io_controller.requestToDir = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if 'VI_hammer' not in buildEnv['PROTOCOL']: panic("This script requires the VI_hammer protocol to be built.") options.access_backing_store = True cpu_sequencers = [] topology = Cluster(intBW = 68, extBW = 68) # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits_float = math.log(options.num_l2caches, 2) l2_bits = int(l2_bits_float) if l2_bits_float > l2_bits: l2_bits += 1 block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits) l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = block_size_bits) l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, L2cache = l2_cache, no_mig_atomic = not \ options.allow_atomic_migration, send_evictions = send_evicts(options), transitions_per_cycle = options.ports, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i, icache = l1i_cache, dcache = l1d_cache, ruby_system = ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: l1_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) topology.add(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave # Connect the buffers from the network to the controller l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.triggerQueue = MessageBuffer() cpu_mem_range = AddrRange(options.total_mem_size) mem_module_size = cpu_mem_range.size() / options.num_dirs # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the # size of the L2 cache. # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index assert(options.numa_high_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits else: if dir_bits > 0: pf_start_bit = dir_bits + block_size_bits - 1 else: pf_start_bit = block_size_bits dir_cntrl_nodes = [] for i in xrange(options.num_dirs): # # Create the Ruby objects associated with the directory controller # dir_size = MemorySize('0B') dir_size.value = mem_module_size pf = ProbeFilter(size = pf_size, assoc = 4, start_index_bit = pf_start_bit) dir_cntrl = Directory_Controller(version = i, directory = \ RubyDirectoryMemory( \ version = i, size = dir_size, numa_high_bit = \ options.numa_high_bit), probeFilter = pf, probe_filter_enabled = options.pf_on, full_bit_dir_enabled = options.dir_on, transitions_per_cycle = options.ports, ruby_system = ruby_system) if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.unblockToDir = MessageBuffer() dir_cntrl.unblockToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.triggerQueue = MessageBuffer(ordered = True) dir_cntrl.responseFromMemory = MessageBuffer() dma_cntrl_nodes = [] for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) if options.recycle_latency: dma_cntrl.recycle_latency = options.recycle_latency # Connect the dma controller to the network dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.mandatoryQueue = MessageBuffer() # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.mandatoryQueue = MessageBuffer() dma_cntrl_nodes.append(io_controller) return (cpu_sequencers, dir_cntrl_nodes, dma_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_hammer': panic("This script requires the MOESI_hammer protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in range(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits, is_icache=True) l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits) l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, L2cache=l2_cache, no_mig_atomic=not \ options.allow_atomic_migration, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: l1_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave l1_cntrl.triggerQueue = MessageBuffer() # Connect the buffers from the network to the controller l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the # size of the L2 cache. # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index assert (options.numa_high_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits else: if dir_bits > 0: pf_start_bit = dir_bits + block_size_bits - 1 else: pf_start_bit = block_size_bits # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: pf = ProbeFilter(size=pf_size, assoc=4, start_index_bit=pf_start_bit) dir_cntrl.probeFilter = pf dir_cntrl.probe_filter_enabled = options.pf_on dir_cntrl.full_bit_dir_enabled = options.dir_on if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered=True) dir_cntrl.unblockToDir = MessageBuffer() dir_cntrl.unblockToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system, slave=dma_port) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) if options.recycle_latency: dma_cntrl.recycle_latency = options.recycle_latency # Connect the dma controller to the network dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.mandatoryQueue = MessageBuffer() all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.mandatoryQueue = MessageBuffer() all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 6 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MI_example': panic("This script requires the MI_example protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # Only one cache exists for this protocol, so by default use the L1D # config parameters. # cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits) # # Only one unified L1 cache exists. Can cache instructions and data. # l1_cntrl = L1Cache_Controller(version=i, cacheMemory=cache, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=system.cpu[i].clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=cache, dcache=cache, clk_domain=system.cpu[i].clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestFromCache = MessageBuffer(ordered=True) l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer(ordered=True) l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.forwardToCache = MessageBuffer(ordered=True) l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer(ordered=True) l1_cntrl.responseToCache.slave = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert (phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system. # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version=i, directory=RubyDirectoryMemory( version=i, size=dir_size), transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer(ordered=True) dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the directory controllers and the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_devices, ruby_system): if not buildEnv['GPGPU_SIM']: m5.util.panic("This script requires GPGPU-Sim integration to be built.") # Run the protocol script to setup CPU cluster, directory and DMA (all_sequencers, dir_cntrls, dma_cntrls, cpu_cluster) = \ VI_hammer.create_system(options, full_system, system, dma_devices, ruby_system) cpu_cntrl_count = len(cpu_cluster) + len(dir_cntrls) # # Build GPU cluster # # Empirically, Fermi per-core bandwidth peaks at roughly 23GB/s # (32B/cycle @ 772MHz). Use ~16B per Ruby cycle to match this. Maxwell # per-core bandwidth peaks at 40GB/s (42B/cycle @ 1029MHz). Use ~24B per # Ruby cycle to match this. if options.gpu_core_config == 'Fermi': l1_cluster_bw = 16 elif options.gpu_core_config == 'Maxwell': l1_cluster_bw = 24 elif options.gpu_core_config == 'Tegra': #FIXME using Fermi for now l1_cluster_bw = 16 else: m5.util.fatal("Unknown GPU core config: %s" % options.gpu_core_config) gpu_cluster = Cluster(intBW = l1_cluster_bw, extBW = l1_cluster_bw) gpu_cluster.disableConnectToParent() l2_bits = int(math.log(options.gpu_num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) # This represents the L1 to L2 interconnect latency # NOTES! 1) This latency is in Ruby (cache) cycles, not SM cycles # 2) Since the cluster interconnect doesn't model multihop latencies, # model these latencies with the controller latency variables. If # the interconnect model is changed, latencies will need to be # adjusted for reasonable total memory access delay. per_hop_interconnect_latency = 45 # ~15 GPU cycles num_dance_hall_hops = int(math.log(options.num_sc, 2)) if num_dance_hall_hops == 0: num_dance_hall_hops = 1 l1_to_l2_noc_latency = per_hop_interconnect_latency * num_dance_hall_hops # # Caches for GPU cores # for i in xrange(options.num_sc): # # First create the Ruby objects associated with the GPU cores # data_cache = L1Cache(size = options.sc_l1_size, assoc = options.sc_l1_assoc, replacement_policy = LRUReplacementPolicy(), start_index_bit = block_size_bits, dataArrayBanks = 4, tagArrayBanks = 4, dataAccessLatency = 4, tagAccessLatency = 4, resourceStalls = False) data_l1_cntrl = GPUL1Cache_Controller(version = i*2, cache = data_cache, l2_select_num_bits = l2_bits, num_l2 = options.gpu_num_l2caches, transitions_per_cycle = options.ports, issue_latency = l1_to_l2_noc_latency, number_of_TBEs = options.gpu_l1_buf_depth, ruby_system = ruby_system) data_gpu_seq = RubySequencer(version = options.num_cpus + i*2, icache = data_cache, dcache = data_cache, max_outstanding_requests = options.gpu_l1_buf_depth, ruby_system = ruby_system, deadlock_threshold = 2000000, connect_to_io = False) tex_cache = L1Cache(size = options.sc_tl1_size, assoc = options.sc_tl1_assoc, replacement_policy = LRUReplacementPolicy(), start_index_bit = block_size_bits, dataArrayBanks = 4, tagArrayBanks = 4, dataAccessLatency = 4, tagAccessLatency = 4, resourceStalls = False) tex_l1_cntrl = GPUL1Cache_Controller(version = i*2+1, cache = tex_cache, l2_select_num_bits = l2_bits, num_l2 = options.gpu_num_l2caches, transitions_per_cycle = options.ports, issue_latency = l1_to_l2_noc_latency, number_of_TBEs = options.gpu_tl1_buf_depth, ruby_system = ruby_system) tex_gpu_seq = RubySequencer(version = options.num_cpus + i*2+1, icache = tex_cache, dcache = tex_cache, max_outstanding_requests = options.gpu_tl1_buf_depth, ruby_system = ruby_system, deadlock_threshold = 2000000, connect_to_io = False) data_l1_cntrl.sequencer = data_gpu_seq tex_l1_cntrl.sequencer = tex_gpu_seq data_i = i*2; tex_i = i*2 +1; exec("ruby_system.l1_cntrl_sp%02d = data_l1_cntrl" % data_i) exec("ruby_system.l1_cntrl_sp%02d = tex_l1_cntrl" % tex_i) # # Add controllers and sequencers to the appropriate lists # all_sequencers.append(data_gpu_seq) all_sequencers.append(tex_gpu_seq) gpu_cluster.add(data_l1_cntrl) gpu_cluster.add(tex_l1_cntrl) # Connect the controllers to the network data_l1_cntrl.requestFromL1Cache = MessageBuffer(ordered = True) data_l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave data_l1_cntrl.responseToL1Cache = MessageBuffer(ordered = True) data_l1_cntrl.responseToL1Cache.slave = ruby_system.network.master data_l1_cntrl.mandatoryQueue = MessageBuffer() tex_l1_cntrl.requestFromL1Cache = MessageBuffer(ordered = True) tex_l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave tex_l1_cntrl.responseToL1Cache = MessageBuffer(ordered = True) tex_l1_cntrl.responseToL1Cache.slave = ruby_system.network.master tex_l1_cntrl.mandatoryQueue = MessageBuffer() l2_index_start = block_size_bits + l2_bits # Use L2 cache and interconnect latencies to calculate protocol latencies # NOTES! 1) These latencies are in Ruby (cache) cycles, not SM cycles # 2) Since the cluster interconnect doesn't model multihop latencies, # model these latencies with the controller latency variables. If # the interconnect model is changed, latencies will need to be # adjusted for reasonable total memory access delay. l2_cache_access_latency = 30 # ~10 GPU cycles l2_to_l1_noc_latency = per_hop_interconnect_latency * num_dance_hall_hops l2_to_mem_noc_latency = 125 # ~40 GPU cycles # Empirically, Fermi per-L2 bank bandwidth peaks at roughly 66GB/s # (92B/cycle @ 772MHz). Use ~34B per Ruby cycle to match this. Maxwell # per-L2 bank bandwidth peaks at 123GB/s (128B/cycle @ 1029MHz). Use ~64B # per Ruby cycle to match this. if options.gpu_core_config == 'Fermi': l2_cluster_bw = 34 elif options.gpu_core_config == 'Maxwell': l2_cluster_bw = 68 elif options.gpu_core_config == 'Tegra': #FIXME using Fermi configs for now l2_cluster_bw = 34 else: m5.util.fatal("Unknown GPU core config: %s" % options.gpu_core_config) l2_clusters = [] for i in xrange(options.gpu_num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size = options.sc_l2_size, assoc = options.sc_l2_assoc, start_index_bit = l2_index_start, replacement_policy = LRUReplacementPolicy(), dataArrayBanks = 4, tagArrayBanks = 4, dataAccessLatency = 4, tagAccessLatency = 4, resourceStalls = options.gpu_l2_resource_stalls) l2_cntrl = GPUL2Cache_Controller(version = i, L2cache = l2_cache, transitions_per_cycle = options.ports, l2_response_latency = l2_cache_access_latency + l2_to_l1_noc_latency, l2_request_latency = l2_to_mem_noc_latency, cache_response_latency = l2_cache_access_latency, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cluster = Cluster(intBW = l2_cluster_bw, extBW = l2_cluster_bw) l2_cluster.add(l2_cntrl) gpu_cluster.add(l2_cluster) l2_clusters.append(l2_cluster) # Connect the controller to the network l2_cntrl.responseToL1Cache = MessageBuffer(ordered = True) l2_cntrl.responseToL1Cache.master = ruby_system.network.slave l2_cntrl.requestFromCache = MessageBuffer() l2_cntrl.requestFromCache.master = ruby_system.network.slave l2_cntrl.responseFromCache = MessageBuffer() l2_cntrl.responseFromCache.master = ruby_system.network.slave l2_cntrl.unblockFromCache = MessageBuffer() l2_cntrl.unblockFromCache.master = ruby_system.network.slave l2_cntrl.requestFromL1Cache = MessageBuffer(ordered = True) l2_cntrl.requestFromL1Cache.slave = ruby_system.network.master l2_cntrl.forwardToCache = MessageBuffer() l2_cntrl.forwardToCache.slave = ruby_system.network.master l2_cntrl.responseToCache = MessageBuffer() l2_cntrl.responseToCache.slave = ruby_system.network.master l2_cntrl.triggerQueue = MessageBuffer() ############################################################################ # Pagewalk cache # NOTE: We use a CPU L1 cache controller here. This is to facilatate MMU # cache coherence (as the GPU L1 caches are incoherent without flushes # The L2 cache is small, and should have minimal affect on the # performance (see Section 6.2 of Power et al. HPCA 2014). pwd_cache = L1Cache(size = options.pwc_size, assoc = options.pwc_assoc, replacement_policy = options.pwc_policy, start_index_bit = block_size_bits, resourceStalls = False) # Small cache since CPU L1 requires I and D pwi_cache = L1Cache(size = "512B", assoc = 2, replacement_policy = LRUReplacementPolicy(), start_index_bit = block_size_bits, resourceStalls = False) # Small cache since CPU L1 controller requires L2 l2_cache = L2Cache(size = "512B", assoc = 2, start_index_bit = block_size_bits, resourceStalls = False) l1_cntrl = L1Cache_Controller(version = options.num_cpus, L1Icache = pwi_cache, L1Dcache = pwd_cache, L2cache = l2_cache, send_evictions = False, transitions_per_cycle = options.ports, issue_latency = l1_to_l2_noc_latency, cache_response_latency = 1, l2_cache_hit_latency = 1, number_of_TBEs = options.gpu_l1_buf_depth, ruby_system = ruby_system) cpu_seq = RubySequencer(version = options.num_cpus + options.num_sc*2, icache = pwd_cache, # Never get data from pwi_cache dcache = pwd_cache, dcache_hit_latency = 8, icache_hit_latency = 8, max_outstanding_requests = options.gpu_l1_buf_depth, ruby_system = ruby_system, deadlock_threshold = 2000000, connect_to_io = False) l1_cntrl.sequencer = cpu_seq ruby_system.l1_pw_cntrl = l1_cntrl all_sequencers.append(cpu_seq) gpu_cluster.add(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave # Connect the buffers from the network to the controller l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.triggerQueue = MessageBuffer() # # Create controller for the copy engine to connect to in GPU cluster # Cache is unused by controller # cache = L1Cache(size = "4096B", assoc = 2) # Setting options.ce_buffering = 0 indicates that the CE can use infinite # buffering, but we need to specify a finite number of outstandng accesses # that the CE is allowed to issue. Just set it to some large number greater # than normal memory access latencies to ensure that the sequencer could # service one access per cycle. max_out_reqs = options.ce_buffering if max_out_reqs == 0: max_out_reqs = 1024 gpu_ce_seq = RubySequencer(version = options.num_cpus + options.num_sc*2 +1, icache = cache, dcache = cache, max_outstanding_requests = max_out_reqs, support_inst_reqs = False, ruby_system = ruby_system, connect_to_io = False) gpu_ce_cntrl = GPUCopyDMA_Controller(version = 0, sequencer = gpu_ce_seq, transitions_per_cycle = options.ports, number_of_TBEs = max_out_reqs, ruby_system = ruby_system) gpu_ce_cntrl.responseFromDir = MessageBuffer(ordered = True) gpu_ce_cntrl.responseFromDir.slave = ruby_system.network.master gpu_ce_cntrl.reqToDirectory = MessageBuffer(ordered = True) gpu_ce_cntrl.reqToDirectory.master = ruby_system.network.slave gpu_ce_cntrl.mandatoryQueue = MessageBuffer() ruby_system.ce_cntrl = gpu_ce_cntrl all_sequencers.append(gpu_ce_seq) # To limit the copy engine's bandwidth, we add it to a limited bandwidth # cluster. Approximate settings are as follows (assuming 2GHz Ruby clock): # PCIe v1.x x16 effective bandwidth ~= 4GB/s: intBW = 3, extBW = 3 # PCIe v2.x x16 effective bandwidth ~= 8GB/s: intBW = 5, extBW = 5 # PCIe v3.x x16 effective bandwidth ~= 16GB/s: intBW = 10, extBW = 10 # PCIe v4.x x16 effective bandwidth ~= 32GB/s: intBW = 21, extBW = 21 # NOTE: Bandwidth may bottleneck at other parts of the memory hierarchy, # so bandwidth considerations should be made in other parts of the memory # hierarchy also. gpu_ce_cluster = Cluster(intBW = 10, extBW = 10) gpu_ce_cluster.add(gpu_ce_cntrl) #z cache z_cache = L1Cache(size = options.sc_zl1_size, assoc = options.sc_zl1_assoc, replacement_policy = LRUReplacementPolicy(), start_index_bit = block_size_bits, dataArrayBanks = 8, tagArrayBanks = 8, dataAccessLatency = 1, tagAccessLatency = 1, resourceStalls = False) z_cntrl = GPUL1Cache_Controller(version = options.num_sc*2, cache = z_cache, l2_select_num_bits = l2_bits, num_l2 = options.gpu_num_l2caches, issue_latency = l1_to_l2_noc_latency, number_of_TBEs = options.gpu_zl1_buf_depth, ruby_system = ruby_system) z_seq = RubySequencer(version = options.num_cpus + options.num_sc*2+2, icache = z_cache, dcache = z_cache, max_outstanding_requests = options.gpu_zl1_buf_depth, ruby_system = ruby_system, deadlock_threshold = 2000000, connect_to_io = False) z_cntrl.sequencer = z_seq ruby_system.l1z_cntrl = z_cntrl all_sequencers.append(z_seq) gpu_cluster.add(z_cntrl) z_cntrl.requestFromL1Cache = MessageBuffer(ordered = True) z_cntrl.requestFromL1Cache.master = ruby_system.network.slave z_cntrl.responseToL1Cache = MessageBuffer(ordered = True) z_cntrl.responseToL1Cache.slave = ruby_system.network.master z_cntrl.mandatoryQueue = MessageBuffer() #z cache acl_cntrls = [] if options.accel_cfg_file: for idx, datapath in enumerate(system.datapaths): acl_cache = L1Cache(size = str(datapath.cacheSize), assoc = datapath.cacheAssoc, replacement_policy = LRUReplacementPolicy(), start_index_bit = block_size_bits, dataAccessLatency = datapath.cacheHitLatency) acli_cache = L1Cache(size = "512B", assoc = 2, replacement_policy = LRUReplacementPolicy(), start_index_bit = block_size_bits, dataAccessLatency = datapath.cacheHitLatency) #l2 cache to satisfy ruby l2_cache = L2Cache(size = "512B", #size = str(datapath.cacheSize), assoc = 2, #assoc = datapath.cacheAssoc, start_index_bit = block_size_bits) assert (not options.is_perfect_cache) #TODO: handle this option acl_cntrl = L1Cache_Controller(version = options.num_cpus+idx+1, L1Dcache = acl_cache, L1Icache = acli_cache, #never used L2cache = l2_cache, no_mig_atomic = not options.allow_atomic_migration, send_evictions = send_evicts(options), transitions_per_cycle = options.ports, ruby_system = ruby_system) acl_seq = RubySequencer(version = options.num_cpus + options.num_sc*2+3+idx, icache = acl_cache, dcache = acl_cache, ruby_system = ruby_system, deadlock_threshold = 2000000) # Connect the L1 controller and the network # Connect the buffers from the controller to network acl_cntrl.requestFromCache = MessageBuffer() acl_cntrl.requestFromCache.master = ruby_system.network.slave acl_cntrl.responseFromCache = MessageBuffer() acl_cntrl.responseFromCache.master = ruby_system.network.slave acl_cntrl.unblockFromCache = MessageBuffer() acl_cntrl.unblockFromCache.master = ruby_system.network.slave # Connect the buffers from the network to the controller acl_cntrl.forwardToCache = MessageBuffer() acl_cntrl.forwardToCache.slave = ruby_system.network.master acl_cntrl.responseToCache = MessageBuffer() acl_cntrl.responseToCache.slave = ruby_system.network.master acl_cntrl.mandatoryQueue = MessageBuffer() acl_cntrl.triggerQueue = MessageBuffer() acl_cntrl.sequencer = acl_seq exec("ruby_system.acl_cntrl%02d = acl_cntrl" % idx) all_sequencers.append(acl_seq) acl_cntrls.append(acl_cntrl) complete_cluster = Cluster(intBW = 32, extBW = 32) complete_cluster.add(gpu_ce_cluster) complete_cluster.add(cpu_cluster) complete_cluster.add(gpu_cluster) for cntrl in dir_cntrls: complete_cluster.add(cntrl) for cntrl in dma_cntrls: complete_cluster.add(cntrl) for cntrl in acl_cntrls: complete_cluster.add(cntrl) for cluster in l2_clusters: complete_cluster.add(cluster) return (all_sequencers, dir_cntrls, complete_cluster)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv["PROTOCOL"] != "MESI_Two_Level": fatal("This script requires the MESI_Two_Level protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache( size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits, is_icache=True ) l1d_cache = L1Cache( size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits, is_icache=False ) prefetcher = RubyPrefetcher.Prefetcher() # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller( version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, l2_select_num_bits=l2_bits, send_evictions=send_evicts(options), prefetcher=prefetcher, ruby_system=ruby_system, clk_domain=clk_domain, transitions_per_cycle=options.ports, enable_prefetch=False, ) cpu_seq = RubySequencer( version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, ruby_system=ruby_system ) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestFromL1Cache = MessageBuffer() l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave l1_cntrl.responseFromL1Cache = MessageBuffer() l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave l1_cntrl.unblockFromL1Cache = MessageBuffer() l1_cntrl.unblockFromL1Cache.master = ruby_system.network.slave l1_cntrl.optionalQueue = MessageBuffer() l1_cntrl.requestToL1Cache = MessageBuffer() l1_cntrl.requestToL1Cache.slave = ruby_system.network.master l1_cntrl.responseToL1Cache = MessageBuffer() l1_cntrl.responseToL1Cache.slave = ruby_system.network.master l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=l2_index_start) l2_cntrl = L2Cache_Controller( version=i, L2cache=l2_cache, transitions_per_cycle=options.ports, ruby_system=ruby_system ) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert phys_mem_size % options.num_dirs == 0 mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain(clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize("0B") dir_size.value = mem_module_size dir_cntrl = Directory_Controller( version=i, directory=RubyDirectoryMemory(version=i, size=dir_size), transitions_per_cycle=options.ports, ruby_system=ruby_system, ) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # Create the Ruby objects associated with the dma controller dma_seq = DMASequencer(version=i, ruby_system=ruby_system, slave=dma_port) dma_cntrl = DMA_Controller( version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system ) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l1_cntrl_nodes + l2_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Three_Level': fatal( "This script requires the MESI_Three_Level protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) num_cpus_per_cluster = options.num_cpus / options.num_clusters assert (options.num_l3caches % options.num_clusters == 0) num_l3caches_per_cluster = options.num_l3caches / options.num_clusters l3_bits = int(math.log(num_l3caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l3_index_start = block_size_bits + l3_bits # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # for i in xrange(options.num_clusters): for j in xrange(num_cpus_per_cluster): # # First create the Ruby objects associated with this cpu # l0i_cache = L0Cache(size=options.l1i_size, assoc=options.l1i_assoc, is_icache=True, start_index_bit=block_size_bits, replacement_policy="LRU") l0d_cache = L0Cache(size=options.l1d_size, assoc=options.l1d_assoc, is_icache=False, start_index_bit=block_size_bits, replacement_policy="LRU") l0z_cache = L0Cache(size=options.l1d_size, assoc=options.l1d_assoc, is_icache=False, start_index_bit=block_size_bits, replacement_policy="LRU") l0_cntrl = L0Cache_Controller(version=i * num_cpus_per_cluster + j, Icache=l0i_cache, Dcache=l0d_cache, Zcache=l0z_cache, send_evictions=send_evicts(options), clk_domain=system.cpu[i].clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l0i_cache, clk_domain=system.cpu[i].clk_domain, zcache=l0z_cache, dcache=l0d_cache, ruby_system=ruby_system) l0_cntrl.sequencer = cpu_seq l1_cache = L1Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=block_size_bits, is_icache=False, dataAccessLatency=6) l1_cntrl = L1Cache_Controller(version=i * num_cpus_per_cluster + j, cache=l1_cache, l2_select_num_bits=l3_bits, cluster_id=i, ruby_system=ruby_system) exec("ruby_system.l0_cntrl%d = l0_cntrl" % (i * num_cpus_per_cluster + j)) exec("ruby_system.l1_cntrl%d = l1_cntrl" % (i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) l0_cntrl_nodes.append(l0_cntrl) l1_cntrl_nodes.append(l1_cntrl) # Connect the L0 and L1 controllers l0_cntrl.bufferToL1 = l1_cntrl.bufferFromL0 l0_cntrl.bufferFromL1 = l1_cntrl.bufferToL0 # Connect the L1 controllers and the network l1_cntrl.requestToL2 = ruby_system.network.slave l1_cntrl.responseToL2 = ruby_system.network.slave l1_cntrl.unblockToL2 = ruby_system.network.slave l1_cntrl.requestFromL2 = ruby_system.network.master l1_cntrl.responseFromL2 = ruby_system.network.master for j in xrange(num_l3caches_per_cluster): l2_cache = L2Cache(size=options.l3_size, assoc=options.l3_assoc, start_index_bit=l3_index_start, dataAccessLatency=14) l2_cntrl = L2Cache_Controller( version=i * num_l3caches_per_cluster + j, L2cache=l2_cache, cluster_id=i, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % (i * num_l3caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = ruby_system.network.slave l2_cntrl.responseFromL2Cache = ruby_system.network.slave l2_cntrl.unblockToL2Cache = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = ruby_system.network.master l2_cntrl.responseToL2Cache = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert (phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): # # Create the Ruby objects associated with the directory controller # dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version=i, directory=RubyDirectoryMemory( version=i, size=dir_size), transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = ruby_system.network.master dir_cntrl.responseToDir = ruby_system.network.master dir_cntrl.responseFromDir = ruby_system.network.slave for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version=i, ruby_system=ruby_system) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.responseFromDir = ruby_system.network.master dma_cntrl.requestToDir = ruby_system.network.slave all_cntrls = l0_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = ruby_system.network.master io_controller.requestToDir = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_CMP_directory': panic("This script requires the MOESI_CMP_directory protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, l2_select_num_bits=l2_bits, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestFromL1Cache = MessageBuffer() l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave l1_cntrl.responseFromL1Cache = MessageBuffer() l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave l1_cntrl.requestToL1Cache = MessageBuffer() l1_cntrl.requestToL1Cache.slave = ruby_system.network.master l1_cntrl.responseToL1Cache = MessageBuffer() l1_cntrl.responseToL1Cache.slave = ruby_system.network.master l1_cntrl.triggerQueue = MessageBuffer(ordered = True) l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.GlobalRequestFromL2Cache = MessageBuffer() l2_cntrl.GlobalRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.GlobalRequestToL2Cache = MessageBuffer() l2_cntrl.GlobalRequestToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master l2_cntrl.triggerQueue = MessageBuffer(ordered = True) phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert(phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system. # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, directory = RubyDirectoryMemory( version = i, size = dir_size), transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer() dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.reqToDir = MessageBuffer() dma_cntrl.reqToDir.master = ruby_system.network.slave dma_cntrl.respToDir = MessageBuffer() dma_cntrl.respToDir.master = ruby_system.network.slave dma_cntrl.triggerQueue = MessageBuffer(ordered = True) all_cntrls = l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer() io_controller.responseFromDir.slave = ruby_system.network.master io_controller.reqToDir = MessageBuffer() io_controller.reqToDir.master = ruby_system.network.slave io_controller.respToDir = MessageBuffer() io_controller.respToDir.master = ruby_system.network.slave io_controller.triggerQueue = MessageBuffer(ordered = True) all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Two_Level': fatal("This script requires the MESI_Two_Level protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) prefetcher = RubyPrefetcher.Prefetcher() l1_cntrl = L1Cache_Controller(version = i, requesterId = "l1_cntrl" + str(i), L1Icache = l1i_cache, L1Dcache = l1d_cache, l2_select_num_bits = l2_bits, send_evictions = send_evicts(options), prefetcher = prefetcher, ruby_system = ruby_system, clk_domain=system.cpu[i].clk_domain, transitions_per_cycle=options.ports, enable_prefetch = False) cpu_seq = RubySequencer(version = i, icache = l1i_cache, dcache = l1d_cache, clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestFromL1Cache = MessageBuffer() l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave l1_cntrl.responseFromL1Cache = MessageBuffer() l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave l1_cntrl.unblockFromL1Cache = MessageBuffer() l1_cntrl.unblockFromL1Cache.master = ruby_system.network.slave l1_cntrl.optionalQueue = MessageBuffer() l1_cntrl.requestToL1Cache = MessageBuffer() l1_cntrl.requestToL1Cache.slave = ruby_system.network.master l1_cntrl.responseToL1Cache = MessageBuffer() l1_cntrl.responseToL1Cache.slave = ruby_system.network.master l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) if options.l2_replacement_policy: if options.l2_replacement_policy == 'LRU': l2_cache.replacement_policy = LRUReplacementPolicy() elif options.l2_replacement_policy == 'SP_static': l2_cache.replacement_policy = SP_staticReplacementPolicy(min_gpu_partition_size = options.min_gpu_partition_size, max_gpu_partition_size = options.max_gpu_partition_size) elif options.l2_replacement_policy == 'Bypass': l2_cache.replacement_policy = BypassReplacementPolicy() else: print 'L2 replacement policy: ' + options.l2_replacement_policy + ' is not supported.' sys.exit(-1) l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, transitions_per_cycle=options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert(phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, directory = RubyDirectoryMemory( version = i, size = dir_size), transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # Create the Ruby objects associated with the dma controller dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_CMP_directory': panic("This script requires the MOESI_CMP_directory protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, l2_select_num_bits = l2_bits, send_evictions = send_evicts(options), transitions_per_cycle = options.ports, clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i, icache = l1i_cache, dcache = l1d_cache, clk_domain=system.cpu[i].clk_domain, ruby_system = ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.requestFromL1Cache = ruby_system.network.slave l1_cntrl.responseFromL1Cache = ruby_system.network.slave l1_cntrl.requestToL1Cache = ruby_system.network.master l1_cntrl.responseToL1Cache = ruby_system.network.master l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.GlobalRequestFromL2Cache = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = ruby_system.network.slave l2_cntrl.responseFromL2Cache = ruby_system.network.slave l2_cntrl.GlobalRequestToL2Cache = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = ruby_system.network.master l2_cntrl.responseToL2Cache = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert(phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system. # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, directory = RubyDirectoryMemory( version = i, size = dir_size), transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = ruby_system.network.master dir_cntrl.responseToDir = ruby_system.network.master dir_cntrl.responseFromDir = ruby_system.network.slave dir_cntrl.forwardFromDir = ruby_system.network.slave for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.responseFromDir = ruby_system.network.master dma_cntrl.reqToDir = ruby_system.network.slave dma_cntrl.respToDir = ruby_system.network.slave all_cntrls = l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = ruby_system.network.master io_controller.reqToDir = ruby_system.network.slave io_controller.respToDir = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Three_Level': fatal("This script requires the MESI_Three_Level protocol to be\ built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes # must be listed before the directory nodes and directory nodes before # dma nodes, etc. # l0_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) num_cpus_per_cluster = options.num_cpus / options.num_clusters assert (options.num_l2caches % options.num_clusters == 0) num_l2caches_per_cluster = options.num_l2caches / options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_index_start = block_size_bits + l2_bits # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # for i in range(options.num_clusters): for j in range(num_cpus_per_cluster): # # First create the Ruby objects associated with this cpu # l0i_cache = L0Cache(size = '4096B', assoc = 1, is_icache = True, start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) l0d_cache = L0Cache(size = '4096B', assoc = 1, is_icache = False, start_index_bit = block_size_bits, replacement_policy = LRUReplacementPolicy()) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l0_cntrl = L0Cache_Controller( version = i * num_cpus_per_cluster + j, Icache = l0i_cache, Dcache = l0d_cache, send_evictions = send_evicts(options), clk_domain = clk_domain, ruby_system = ruby_system) cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j, icache = l0i_cache, clk_domain = clk_domain, dcache = l0d_cache, ruby_system = ruby_system) l0_cntrl.sequencer = cpu_seq l1_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) l1_cntrl = L1Cache_Controller( version = i * num_cpus_per_cluster + j, cache = l1_cache, l2_select_num_bits = l2_bits, cluster_id = i, ruby_system = ruby_system) exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( i * num_cpus_per_cluster + j)) exec("ruby_system.l1_cntrl%d = l1_cntrl" % ( i * num_cpus_per_cluster + j)) # # Add controllers and sequencers to the appropriate lists # cpu_sequencers.append(cpu_seq) l0_cntrl_nodes.append(l0_cntrl) l1_cntrl_nodes.append(l1_cntrl) # Connect the L0 and L1 controllers l0_cntrl.mandatoryQueue = MessageBuffer() l0_cntrl.bufferToL1 = MessageBuffer(ordered = True) l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1 l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True) l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1 # Connect the L1 controllers and the network l1_cntrl.requestToL2 = MessageBuffer() l1_cntrl.requestToL2.master = ruby_system.network.slave l1_cntrl.responseToL2 = MessageBuffer() l1_cntrl.responseToL2.master = ruby_system.network.slave l1_cntrl.unblockToL2 = MessageBuffer() l1_cntrl.unblockToL2.master = ruby_system.network.slave l1_cntrl.requestFromL2 = MessageBuffer() l1_cntrl.requestFromL2.slave = ruby_system.network.master l1_cntrl.responseFromL2 = MessageBuffer() l1_cntrl.responseFromL2.slave = ruby_system.network.master for j in range(num_l2caches_per_cluster): l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller( version = i * num_l2caches_per_cluster + j, L2cache = l2_cache, cluster_id = i, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % (i * num_l2caches_per_cluster + j)) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain = ruby_system.clk_domain, clk_divider = 3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l0_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] # Register configuration with filesystem else: FileSystemConfig.config_filesystem(options) for i in xrange(options.num_clusters): for j in xrange(num_cpus_per_cluster): FileSystemConfig.register_cpu(physical_package_id = 0, core_siblings = xrange(options.num_cpus), core_id = i*num_cpus_per_cluster+j, thread_siblings = []) FileSystemConfig.register_cache(level = 0, idu_type = 'Instruction', size = '4096B', line_size = options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 0, idu_type = 'Data', size = '4096B', line_size = options.cacheline_size, assoc = 1, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 1, idu_type = 'Unified', size = options.l1d_size, line_size = options.cacheline_size, assoc = options.l1d_assoc, cpus = [i*num_cpus_per_cluster+j]) FileSystemConfig.register_cache(level = 2, idu_type = 'Unified', size = str(MemorySize(options.l2_size) * \ num_l2caches_per_cluster)+'B', line_size = options.cacheline_size, assoc = options.l2_assoc, cpus = [n for n in xrange(i*num_cpus_per_cluster, \ (i+1)*num_cpus_per_cluster)]) ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Two_Level': fatal("This script requires the MESI_Two_Level protocol to be built.") ruby_system.num_simics_net_ports = options.num_networkports ruby_system.num_accelerators = options.accelerators ruby_system.num_TDs = options.num_tds cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # netport_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) assert(options.num_networkports == options.num_l2caches) num_l1_cntrls = ((options.accelerators + options.num_tds + options.num_networkports - 1)/options.num_networkports) * options.num_networkports print "num_l1_cntrls = %d" % num_l1_cntrls assert(num_l1_cntrls >= (options.accelerators + options.num_tds)) for i in xrange(options.num_networkports): # First create the Ruby objects associated with # the CPU and Accelerator signal communication netport_cntrl = gem5NetworkPortInterface_Controller(version = i, transitions_per_cycle=options.ports, ruby_system = ruby_system) exec("ruby_system.netport_cntrl%d = netport_cntrl" % i) netport_cntrl_nodes.append(netport_cntrl) # Connect the netport controller to the network netport_cntrl.messageOut = ruby_system.network.slave netport_cntrl.messageIn = ruby_system.network.master for i in xrange(num_l1_cntrls): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) prefetcher = RubyPrefetcher.Prefetcher() l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, l2_select_num_bits = l2_bits, l2_select_low_bit = block_size_bits, send_evictions = send_evicts(options), prefetcher = prefetcher, ruby_system = ruby_system, clk_domain=system.cpu[0].clk_domain, transitions_per_cycle=options.ports, enable_prefetch = False) cpu_seq = RubySequencer(version = i, icache = l1i_cache, dcache = l1d_cache, clk_domain=system.cpu[0].clk_domain, ruby_system = ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists if len(cpu_sequencers) < options.num_cpus : cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.requestFromL1Cache = ruby_system.network.slave l1_cntrl.responseFromL1Cache = ruby_system.network.slave l1_cntrl.unblockFromL1Cache = ruby_system.network.slave l1_cntrl.requestToL1Cache = ruby_system.network.master l1_cntrl.responseToL1Cache = ruby_system.network.master l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, transitions_per_cycle=options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = ruby_system.network.slave l2_cntrl.responseFromL2Cache = ruby_system.network.slave l2_cntrl.unblockToL2Cache = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = ruby_system.network.master l2_cntrl.responseToL2Cache = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert(phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, directory = RubyDirectoryMemory( version = i, size = dir_size), transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = ruby_system.network.master dir_cntrl.responseToDir = ruby_system.network.master dir_cntrl.responseFromDir = ruby_system.network.slave for i, dma_port in enumerate(dma_ports): # Create the Ruby objects associated with the dma controller dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.responseFromDir = ruby_system.network.master dma_cntrl.requestToDir = ruby_system.network.slave all_cntrls = netport_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = ruby_system.network.master io_controller.requestToDir = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, bootmem, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_CMP_token': panic("This script requires the MOESI_CMP_token protocol to be built.") # # number of tokens that the owner passes to requests so that shared blocks can # respond to read requests # n_tokens = options.num_cpus + 1 cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) for i in range(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, l2_select_num_bits=l2_bits, N_tokens=n_tokens, retry_threshold=options.l1_retries, fixed_timeout_latency=\ options.timeout_latency, dynamic_timeout_enabled=\ not options.disable_dyn_timeouts, no_mig_atomic=not \ options.allow_atomic_migration, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.requestFromL1Cache = MessageBuffer() l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave l1_cntrl.responseFromL1Cache = MessageBuffer() l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave l1_cntrl.persistentFromL1Cache = MessageBuffer(ordered = True) l1_cntrl.persistentFromL1Cache.master = ruby_system.network.slave l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestToL1Cache = MessageBuffer() l1_cntrl.requestToL1Cache.slave = ruby_system.network.master l1_cntrl.responseToL1Cache = MessageBuffer() l1_cntrl.responseToL1Cache.slave = ruby_system.network.master l1_cntrl.persistentToL1Cache = MessageBuffer(ordered = True) l1_cntrl.persistentToL1Cache.slave = ruby_system.network.master l2_index_start = block_size_bits + l2_bits for i in range(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, N_tokens = n_tokens, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.GlobalRequestFromL2Cache = MessageBuffer() l2_cntrl.GlobalRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.GlobalRequestToL2Cache = MessageBuffer() l2_cntrl.GlobalRequestToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master l2_cntrl.persistentToL2Cache = MessageBuffer(ordered = True) l2_cntrl.persistentToL2Cache.slave = ruby_system.network.master # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories( options, bootmem, ruby_system, system) dir_cntrl_nodes = mem_dir_cntrl_nodes[:] if rom_dir_cntrl_node is not None: dir_cntrl_nodes.append(rom_dir_cntrl_node) for dir_cntrl in dir_cntrl_nodes: dir_cntrl.l2_select_num_bits = l2_bits # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.persistentToDir = MessageBuffer(ordered = True) dir_cntrl.persistentToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.requestFromDir = MessageBuffer() dir_cntrl.requestFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.persistentFromDir = MessageBuffer(ordered = True) dir_cntrl.persistentFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.reqToDirectory = MessageBuffer() dma_cntrl.reqToDirectory.master = ruby_system.network.slave all_cntrls = l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.reqToDirectory = MessageBuffer() io_controller.reqToDirectory.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 6 topology = create_topology(all_cntrls, options) return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MOESI_hammer': panic("This script requires the MOESI_hammer protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits) l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = block_size_bits) # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, L2cache=l2_cache, no_mig_atomic=not \ options.allow_atomic_migration, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache,clk_domain=clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq if options.recycle_latency: l1_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controller and the network # Connect the buffers from the controller to network l1_cntrl.requestFromCache = MessageBuffer() l1_cntrl.requestFromCache.master = ruby_system.network.slave l1_cntrl.responseFromCache = MessageBuffer() l1_cntrl.responseFromCache.master = ruby_system.network.slave l1_cntrl.unblockFromCache = MessageBuffer() l1_cntrl.unblockFromCache.master = ruby_system.network.slave l1_cntrl.triggerQueue = MessageBuffer() # Connect the buffers from the network to the controller l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.forwardToCache = MessageBuffer() l1_cntrl.forwardToCache.slave = ruby_system.network.master l1_cntrl.responseToCache = MessageBuffer() l1_cntrl.responseToCache.slave = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert(phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # # determine size and index bits for probe filter # By default, the probe filter size is configured to be twice the # size of the L2 cache. # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index assert(options.numa_high_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits else: if dir_bits > 0: pf_start_bit = dir_bits + block_size_bits - 1 else: pf_start_bit = block_size_bits # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size pf = ProbeFilter(size = pf_size, assoc = 4, start_index_bit = pf_start_bit) dir_cntrl = Directory_Controller(version = i, directory = RubyDirectoryMemory( version = i, size = dir_size), probeFilter = pf, probe_filter_enabled = options.pf_on, full_bit_dir_enabled = options.dir_on, transitions_per_cycle = options.ports, ruby_system = ruby_system) if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controller to the network dir_cntrl.forwardFromDir = MessageBuffer() dir_cntrl.forwardFromDir.master = ruby_system.network.slave dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True) dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave dir_cntrl.triggerQueue = MessageBuffer(ordered = True) dir_cntrl.unblockToDir = MessageBuffer() dir_cntrl.unblockToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True) dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # # Create the Ruby objects associated with the dma controller # dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) if options.recycle_latency: dma_cntrl.recycle_latency = options.recycle_latency # Connect the dma controller to the network dma_cntrl.responseFromDir = MessageBuffer(ordered = True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave dma_cntrl.mandatoryQueue = MessageBuffer() all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = MessageBuffer(ordered = True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave io_controller.mandatoryQueue = MessageBuffer() all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 6 topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Two_Level': fatal("This script requires the MESI_Two_Level protocol to be built.") cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # l1_cntrl_nodes = [] l2_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # # [Cleanup Cache] Pass option to randomize L2, provide memory footprint l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start, is_index_randomized = options.rand_L2, mem_footprint_bits=int(math.log(\ float(AddrRange(options.mem_size).end+1), 2)), remap_accesses = 100 ) l2_cntrl = L2Cache_Controller(version=i, L2cache=l2_cache, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = MessageBuffer() l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = MessageBuffer() l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave l2_cntrl.responseFromL2Cache = MessageBuffer() l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave l2_cntrl.unblockToL2Cache = MessageBuffer() l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = MessageBuffer() l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master l2_cntrl.responseToL2Cache = MessageBuffer() l2_cntrl.responseToL2Cache.slave = ruby_system.network.master for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits, is_icache=True) if (options.random_repl_L1): l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits, is_icache=False, replacement_policy=RandomReplacementPolicy(), tracks_sideeffects=options.l1d_mshr_tracking) elif (options.LRU_repl_L1): l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits, is_icache=False, replacement_policy=LRUReplacementPolicy(), tracks_sideeffects=options.l1d_mshr_tracking) else: l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits, is_icache=False, tracks_sideeffects=options.l1d_mshr_tracking) prefetcher = RubyPrefetcher.Prefetcher() # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one # tester object, num_cpus is not necessarily equal to the # size of system.cpu; therefore if len(system.cpu) == 1 # we use system.cpu[0] to set the clk_domain, thereby ensuring # we don't index off the end of the cpu list. if len(system.cpu) == 1: clk_domain = system.cpu[0].clk_domain else: clk_domain = system.cpu[i].clk_domain l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, L2cache=l2_cache, l2_select_num_bits=l2_bits, send_evictions=send_evicts(options), prefetcher=prefetcher, ruby_system=ruby_system, clk_domain=clk_domain, transitions_per_cycle=options.ports, enable_prefetch=options.enable_prefetch) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, L2inv=options.L2Inv, dcache_hit_latency=options.l1d_latency, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.mandatoryQueue = MessageBuffer() l1_cntrl.requestFromL1Cache = MessageBuffer() l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave l1_cntrl.responseFromL1Cache = MessageBuffer() l1_cntrl.responseFromL1Cache.master = ruby_system.network.slave l1_cntrl.unblockFromL1Cache = MessageBuffer() l1_cntrl.unblockFromL1Cache.master = ruby_system.network.slave l1_cntrl.optionalQueue = MessageBuffer() l1_cntrl.requestToL1Cache = MessageBuffer() l1_cntrl.requestToL1Cache.slave = ruby_system.network.master l1_cntrl.responseToL1Cache = MessageBuffer() l1_cntrl.responseToL1Cache.slave = ruby_system.network.master # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) dir_cntrl_nodes = create_directories(options, system.mem_ranges, ruby_system) for dir_cntrl in dir_cntrl_nodes: # Connect the directory controllers and the network dir_cntrl.requestToDir = MessageBuffer() dir_cntrl.requestToDir.slave = ruby_system.network.master dir_cntrl.responseToDir = MessageBuffer() dir_cntrl.responseToDir.slave = ruby_system.network.master dir_cntrl.responseFromDir = MessageBuffer() dir_cntrl.responseFromDir.master = ruby_system.network.slave dir_cntrl.responseFromMemory = MessageBuffer() for i, dma_port in enumerate(dma_ports): # Create the Ruby objects associated with the dma controller dma_seq = DMASequencer(version=i, ruby_system=ruby_system, slave=dma_port) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.mandatoryQueue = MessageBuffer() dma_cntrl.responseFromDir = MessageBuffer(ordered=True) dma_cntrl.responseFromDir.slave = ruby_system.network.master dma_cntrl.requestToDir = MessageBuffer() dma_cntrl.requestToDir.master = ruby_system.network.slave all_cntrls = l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.mandatoryQueue = MessageBuffer() io_controller.responseFromDir = MessageBuffer(ordered=True) io_controller.responseFromDir.slave = ruby_system.network.master io_controller.requestToDir = MessageBuffer() io_controller.requestToDir.master = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] ruby_system.network.number_of_virtual_networks = 3 topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)