def config_mem(options, system): """ Create the memory controllers based on the options and attach them. If requested, we make a multi-channel configuration of the selected memory controller class by creating multiple instances of the specific class. The individual controllers have their parameters set such that the address range is interleaved between them. """ nbr_mem_ctrls = options.mem_channels import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2 ** intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") cls = get(options.mem_type) mem_ctrls = [] # For every range (most systems will only have one), create an # array of controllers and set their parameters to match their # address mapping in the case of a DRAM for r in system.mem_ranges: for i in xrange(nbr_mem_ctrls): mem_ctrls.append(create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, system.cache_line_size.value)) system.mem_ctrls = mem_ctrls # Connect the controllers to the membus for i in xrange(len(system.mem_ctrls)): system.mem_ctrls[i].port = system.membus.master
def wrapper(url): from urlparse import parse_qs from ast import literal_eval qs = parse_qs(url.query, keep_blank_values=True) # parse_qs returns a list of values for each parameter. Only # use the last value since kwargs don't allow multiple values # per parameter. Use literal_eval to transform string param # values into proper Python types. def parse_value(key, values): if len(values) == 0 or (len(values) == 1 and not values[0]): fatal("%s: '%s' doesn't have a value." % (url.geturl(), key)) elif len(values) > 1: fatal("%s: '%s' has multiple values." % (url.geturl(), key)) else: try: return key, literal_eval(values[0]) except ValueError: fatal("%s: %s isn't a valid Python literal" \ % (url.geturl(), values[0])) kwargs = dict([ parse_value(k, v) for k, v in qs.items() ]) try: return func("%s%s" % (url.netloc, url.path), **kwargs) except TypeError: fatal("Illegal stat visitor parameter specified")
def enable(): '''Enable the statistics package. Before the statistics package is enabled, all statistics must be created and initialized and once the package is enabled, no more statistics can be created.''' global stats_list stats_list = list(_m5.stats.statsList()) for stat in stats_list: if not stat.check() or not stat.baseCheck(): fatal("statistic '%s' (%d) was not properly initialized " \ "by a regStats() function\n", stat.name, stat.id) if not (stat.flags & flags.display): stat.name = "__Stat%06d" % stat.id def less(stat1, stat2): v1 = stat1.name.split('.') v2 = stat2.name.split('.') return v1 < v2 stats_list.sort(less) for stat in stats_list: stats_dict[stat.name] = stat stat.enable() _m5.stats.enable();
def config_mem(options, system): """ Create the memory controllers based on the options and attach them. If requested, we make a multi-channel configuration of the selected memory controller class by creating multiple instances of the specific class. The individual controllers have their parameters set such that the address range is interleaved between them. """ if options.external_memory_system: system.external_memory = m5.objects.ExternalSlave( port_type=options.external_memory_system, port_data="init_mem0", port=system.membus.master, addr_ranges=system.mem_ranges) system.kernel_addr_check = False return nbr_mem_ctrls = options.mem_channels import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2 ** intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") cls = get(options.mem_type) mem_ctrls = [] # The default behaviour is to interleave memory channels on 128 # byte granularity, or cache line granularity if larger than 128 # byte. This value is based on the locality seen across a large # range of workloads. intlv_size = max(128, system.cache_line_size.value) # For every range (most systems will only have one), create an # array of controllers and set their parameters to match their # address mapping in the case of a DRAM for r in system.mem_ranges: #weilong print "{} {}".format("system mem range:", r) print "{} {}".format("# of mem ctrls:", nbr_mem_ctrls) for i in xrange(nbr_mem_ctrls): mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size) # Set the number of ranks based on the command-line # options if it was explicitly set if issubclass(cls, m5.objects.DRAMCtrl) and \ options.mem_ranks: mem_ctrl.ranks_per_channel = options.mem_ranks mem_ctrls.append(mem_ctrl) system.mem_ctrls = mem_ctrls print "{} {}".format("# of mem channels:", len(mem_ctrls)) # Connect the controllers to the membus for i in xrange(len(system.mem_ctrls)): system.mem_ctrls[i].port = system.membus.master
def addStatVisitor(url): """Add a stat visitor specified using a URL string Stat visitors are specified using URLs on the following format: format://path[?param=value[;param=value]] The available formats are listed in the factories list. Factories are called with the path as the first positional parameter and the parameters are keyword arguments. Parameter values must be valid Python literals. """ try: from urllib.parse import urlsplit except ImportError: # Python 2 fallback from urlparse import urlsplit parsed = urlsplit(url) try: factory = factories[parsed.scheme] except KeyError: fatal("Illegal stat file type specified.") outputList.append(factory(parsed))
def enable(): '''Enable the statistics package. Before the statistics package is enabled, all statistics must be created and initialized and once the package is enabled, no more statistics can be created.''' __dynamic_cast = [] for k, v in internal.stats.__dict__.iteritems(): if k.startswith('dynamic_'): __dynamic_cast.append(v) for stat in internal.stats.statsList(): for cast in __dynamic_cast: val = cast(stat) if val is not None: stats_list.append(val) raw_stats_list.append(val) break else: fatal("unknown stat type %s", stat) for stat in stats_list: if not stat.check() or not stat.baseCheck(): fatal("stat check failed for '%s' %d\n", stat.name, stat.id) if not (stat.flags & flags.display): stat.name = "__Stat%06d" % stat.id def less(stat1, stat2): v1 = stat1.name.split('.') v2 = stat2.name.split('.') return v1 < v2 stats_list.sort(less) for stat in stats_list: stats_dict[stat.name] = stat stat.enable()
def makeMultiChannel(cls, nbr_mem_ctrls, mem_start_addr, mem_size, intlv_high_bit = 11): """ Make a multi-channel configuration of this class. Create multiple instances of the specific class and set their parameters such that the address range is interleaved between them. Returns a list of controllers. """ import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2 ** intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") mem_ctrls = [] for i in xrange(nbr_mem_ctrls): # The default interleaving granularity is tuned to match a # row buffer size of 32 cache lines of 64 bytes (starting # at bit 11 for 2048 bytes). There is unfortunately no # good way of checking this at instantiation time. mem_ctrls.append(cls(range = AddrRange(mem_start_addr, size = mem_size, intlvHighBit = \ intlv_high_bit, intlvBits = intlv_bits, intlvMatch = i), channels = nbr_mem_ctrls)) return mem_ctrls
def createGPU(options, gpu_mem_range): # DEPRECATED: Set a default GPU DRAM clock to be passed to the wrapper. # This must be eliminated when the wrapper can be removed. options.gpu_dram_clock = None gpgpusimOptions = parseGpgpusimConfig(options) # The GPU's clock domain is a source for all of the components within the # GPU. By making it a SrcClkDomain, it can be directly referenced to change # the GPU clock frequency dynamically. gpu = CudaGPU(warp_size = options.gpu_warp_size, manage_gpu_memory = options.split, clk_domain = SrcClockDomain(clock = options.gpu_core_clock, voltage_domain = VoltageDomain()), gpu_memory_range = gpu_mem_range) gpu.cores_wrapper = GPGPUSimComponentWrapper(clk_domain = gpu.clk_domain) gpu.icnt_wrapper = GPGPUSimComponentWrapper(clk_domain = DerivedClockDomain( clk_domain = gpu.clk_domain, clk_divider = 2)) gpu.l2_wrapper = GPGPUSimComponentWrapper(clk_domain = gpu.clk_domain) gpu.dram_wrapper = GPGPUSimComponentWrapper( clk_domain = SrcClockDomain( clock = options.gpu_dram_clock, voltage_domain = gpu.clk_domain.voltage_domain)) warps_per_core = options.gpu_threads_per_core / options.gpu_warp_size gpu.shader_cores = [CudaCore(id = i, warp_contexts = warps_per_core) for i in xrange(options.num_sc)] gpu.ce = GPUCopyEngine(driver_delay = 5000000) for sc in gpu.shader_cores: sc.lsq = ShaderLSQ() sc.lsq.data_tlb.entries = options.gpu_tlb_entries sc.lsq.forward_flush = (buildEnv['PROTOCOL'] == 'VI_hammer_fusion' \ and options.flush_kernel_end) sc.lsq.warp_size = options.gpu_warp_size sc.lsq.cache_line_size = options.cacheline_size # sc.lsq.request_buffer_depth = options.gpu_l1_buf_depth if options.gpu_threads_per_core % options.gpu_warp_size: fatal("gpu_warp_size must divide gpu_threads_per_core evenly.") sc.lsq.warp_contexts = warps_per_core # This is a stop-gap solution until we implement a better way to register device memory if options.access_host_pagetable: for sc in gpu.shader_cores: sc.itb.access_host_pagetable = True sc.lsq.data_tlb.access_host_pagetable = True gpu.ce.device_dtb.access_host_pagetable = True gpu.ce.host_dtb.access_host_pagetable = True gpu.shared_mem_delay = options.shMemDelay gpu.config_path = gpgpusimOptions gpu.dump_kernel_stats = options.kernel_stats return gpu
def parseGpgpusimConfig(options): # parse gpgpu config file # First check the cwd, and if there is not a gpgpusim.config file there # Use the template found in gem5-fusion/configs/gpu_config and fill in # the missing information with command line options. if options.gpgpusim_config: usingTemplate = False gpgpusimconfig = options.gpgpusim_config else: gpgpusimconfig = os.path.join(os.path.dirname(__file__), 'gpu_config/gpgpusim.config.template') usingTemplate = True if not os.path.isfile(gpgpusimconfig): fatal("Unable to find gpgpusim config (%s)" % gpgpusimconfig) f = open(gpgpusimconfig, 'r') config = f.read() f.close() if usingTemplate: print "Using template and command line options for gpgpusim.config" config = config.replace("%clusters%", str(options.clusters)) config = config.replace("%cores_per_cluster%", str(options.cores_per_cluster)) config = config.replace("%ctas_per_shader%", str(options.ctas_per_shader)) config = config.replace("%icnt_file%", os.path.join(os.path.dirname(__file__), "gpu_config/icnt_config_fermi_islip.txt")) config = config.replace("%warp_size%", str(options.gpu_warp_size)) # GPGPU-Sim config expects freq in MHz config = config.replace("%freq%", str(toFrequency(options.gpu_core_clock) / 1.0e6)) config = config.replace("%threads_per_sm%", str(options.gpu_threads_per_core)) options.num_sc = options.clusters*options.cores_per_cluster f = open(m5.options.outdir + '/gpgpusim.config', 'w') f.write(config) f.close() gpgpusimconfig = m5.options.outdir + '/gpgpusim.config' else: print "Using gpgpusim.config for clusters, cores_per_cluster, Frequency, warp size" start = config.find("-gpgpu_n_clusters ") + len("-gpgpu_n_clusters ") end = config.find('-', start) gpgpu_n_clusters = int(config[start:end]) start = config.find("-gpgpu_n_cores_per_cluster ") + len("-gpgpu_n_cores_per_cluster ") end = config.find('-', start) gpgpu_n_cores_per_cluster = int(config[start:end]) num_sc = gpgpu_n_clusters * gpgpu_n_cores_per_cluster options.num_sc = num_sc start = config.find("-gpgpu_clock_domains ") + len("-gpgpu_clock_domains ") end = config.find(':', start) options.gpu_core_clock = config[start:end] + "MHz" start = config.find('-gpgpu_shader_core_pipeline ') + len('-gpgpu_shader_core_pipeline ') start = config.find(':', start) + 1 end = config.find('\n', start) options.gpu_warp_size = int(config[start:end]) if options.pwc_size == "0": # Bypass the shared L1 cache options.gpu_tlb_bypass_l1 = True else: # Do not bypass the page walk cache options.gpu_tlb_bypass_l1 = False return gpgpusimconfig
def get(name): """Get a platform class from a user provided class name.""" real_name = _platform_aliases.get(name, name) try: return _platform_classes[real_name] except KeyError: fatal("%s is not a valid Platform model." % (name,))
def build_pardg5v_system(np): if buildEnv['TARGET_ISA'] == "x86": pardsys = makePARDg5VSystem(test_mem_mode, options.num_cpus, bm[0]) else: fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA']) # Set the cache line size for the entire system pardsys.cache_line_size = options.cacheline_size # Create a top-level voltage domain pardsys.voltage_domain = VoltageDomain(voltage = options.sys_voltage) # Create a source clock for the system and set the clock period pardsys.clk_domain = SrcClockDomain(clock = options.sys_clock, voltage_domain = pardsys.voltage_domain) # Create a CPU voltage domain pardsys.cpu_voltage_domain = VoltageDomain() # Create a source clock for the CPUs and set the clock period pardsys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock, voltage_domain = pardsys.cpu_voltage_domain) if options.kernel is not None: pardsys.kernel = binary(options.kernel) if options.script is not None: pardsys.readfile = options.script pardsys.init_param = options.init_param # For now, assign all the CPUs to the same clock domain pardsys.cpu = [TestCPUClass(clk_domain=pardsys.cpu_clk_domain, cpu_id=i) for i in xrange(np)] if options.caches or options.l2cache: # By default the IOCache runs at the system clock pardsys.iocache = IOCache(addr_ranges = [AddrRange('3GB'), AddrRange(start='4GB', size='4GB')]) pardsys.iocache.cpu_side = pardsys.iobus.master pardsys.iocache.mem_side = pardsys.membus.slave else: pardsys.iobridge = Bridge(delay='50ns', ranges = [AddrRange('3GB'), AddrRange(start='4GB', size='4GB')]) pardsys.iobridge.slave = pardsys.iobus.master pardsys.iobridge.master = pardsys.membus.slave for i in xrange(np): pardsys.cpu[i].createThreads() CacheConfig.config_cache(options, pardsys) XMemConfig.config_mem(options, pardsys) return pardsys
def config_mem(options, system, domain): """ Create the memory controllers based on the options and attach them. If requested, we make a multi-channel configuration of the selected memory controller class by creating multiple instances of the specific class. The individual controllers have their parameters set such that the address range is interleaved between them. """ nbr_mem_ctrls = options.mem_channels import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2 ** intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") cls = get(options.mem_type) mem_ctrls = [] # The default behaviour is to interleave memory channels on 128 # byte granularity, or cache line granularity if larger than 128 # byte. This value is based on the locality seen across a large # range of workloads. intlv_size = max(128, system.cache_line_size.value) # For every range (most systems will only have one), create an # array of controllers and set their parameters to match their # address mapping in the case of a DRAM print_addr_ranges('domain#' + str(domain.id) + '.mem_ranges: ', domain.mem_ranges) for r in domain.mem_ranges: for i in xrange(nbr_mem_ctrls): mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size) # Set the number of ranks based on the command-line # options if it was explicitly set if issubclass(cls, m5.objects.DRAMCtrl) and \ options.mem_ranks: mem_ctrl.ranks_per_channel = options.mem_ranks mem_ctrls.append(mem_ctrl) domain.mem_ctrls = mem_ctrls # Connect the controllers to the membus for i in xrange(len(domain.mem_ctrls)): domain.mem_ctrls[i].port = domain.membus.master
def __new__(cls, **kwargs): if Root._the_instance: fatal("Attempt to allocate multiple instances of Root.") return None # first call: allocate the unique instance # # If SimObject ever implements __new__, we may want to pass # kwargs here, but for now this goes straight to # object.__new__ which prints an ugly warning if you pass it # args. Seems like a bad design but that's the way it is. Root._the_instance = SimObject.__new__(cls) return Root._the_instance
def config_mem(options, system): """ Create the memory controllers based on the options and attach them. If requested, we make a multi-channel configuration of the selected memory controller class by creating multiple instances of the specific class. The individual controllers have their parameters set such that the address range is interleaved between them. """ nbr_mem_ctrls = options.mem_channels import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2 ** intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") cls = get(options.mem_type) mem_ctrls = [] #### For every range (most systems will only have one), create an #### array of controllers and set their parameters to match their #### address mapping in the case of a DRAM ###for r in system.mem_ranges: ### for i in xrange(nbr_mem_ctrls): ### mem_ctrls.append(create_mem_ctrl(cls, r, i, nbr_mem_ctrls, ### intlv_bits, ### system.cache_line_size.value)) ### ###system.mem_ctrls = mem_ctrls #### Connect the controllers to the membus ###for i in xrange(len(system.mem_ctrls)): ### system.mem_ctrls[i].port = system.membus.memory_port from m5.objects import AddrRange from m5.util import convert mem_size = convert.toMemorySize(options.mem_size) + convert.toMemorySize('1GB') mem_ctrls.append(create_mem_ctrl(cls, AddrRange(0, size = mem_size), 0, 1, intlv_bits, system.cache_line_size.value)) #system.mem_ctrls = mem_ctrls #system.mem_ctrls[0].port = system.membus.memory_port from m5.objects import PARDMemoryCtrl system.mem_ctrl = PARDMemoryCtrl(memories=mem_ctrls[0]) system.mem_ctrl.port = system.membus.memory_port system.mem_ctrl.attachDRAM()
def getTestFilename(test_location): file_chop_index = test_location.find('tests/') if file_chop_index <= 0: fatal('test_filename lacks \'tests\/\' substring') test_filename = test_location[file_chop_index:] test_filename = test_filename.replace('/opt/','/') test_filename = test_filename.replace('/debug/','/') test_filename = test_filename.replace('/fast/','/') print test_filename supported_isas = [ 'arm', 'x86' ] isa = None for test_isa in supported_isas: if test_isa in test_filename: isa = test_isa break if not isa: fatal('ISA not found in test: %s' % test_filename) file_chop_index = test_filename.find('%s/' % isa) if file_chop_index >= len(test_filename): fatal('test_filename lacks \'%s\/\' substring' % isa) test_filename = test_filename[:file_chop_index] test_filename = os.path.join(test_filename, 'test.py') if not os.path.exists(test_filename): fatal('Could not find test script: \'%s\'' % test_filename) return test_filename
def connectGPUPorts(gpu, ruby, options): for i,sc in enumerate(gpu.shader_cores): sc.inst_port = ruby._cpu_ports[options.num_cpus+i].slave for j in xrange(options.gpu_warp_size): sc.lsq_port[j] = sc.lsq.lane_port[j] sc.lsq.cache_port = ruby._cpu_ports[options.num_cpus+i].slave sc.lsq_ctrl_port = sc.lsq.control_port # The total number of sequencers is equal to the number of CPU cores, plus # the number of GPU cores plus any pagewalk caches and the copy engine # caches. Currently, for unified address space architectures, there is one # pagewalk cache and one copy engine cache (2 total), and the pagewalk cache # is indexed first. For split address space architectures, there are 2 copy # engine caches, and the host-side cache is indexed before the device-side. assert(len(ruby._cpu_ports) == options.num_cpus + options.num_sc + 2) # Initialize the MMU, connecting it to either the pagewalk cache port for # unified address space, or the copy engine's host-side sequencer port for # split address space architectures. gpu.shader_mmu.setUpPagewalkers(32, ruby._cpu_ports[options.num_cpus+options.num_sc].slave, options.gpu_tlb_bypass_l1) if options.split: # NOTE: In split address space architectures, the MMU only provides the # copy engine host-side TLB access to a page walker. This should # probably be changed so that the copy engine doesn't manage # translations, but only the data handling # If inappropriately used, crash to inform MMU config problems to user: if options.access_host_pagetable: fatal('Cannot access host pagetable from the GPU or the copy ' \ 'engine\'s GPU-side port\n in split address space. Use ' \ 'only one of --split or --access-host-pagetable') # Tie copy engine ports to appropriate sequencers gpu.ce.host_port = \ ruby._cpu_ports[options.num_cpus+options.num_sc].slave gpu.ce.device_port = \ ruby._cpu_ports[options.num_cpus+options.num_sc+1].slave gpu.ce.device_dtb.access_host_pagetable = False else: # With a unified address space, tie both copy engine ports to the same # copy engine controller. NOTE: The copy engine is often unused in the # unified address space gpu.ce.host_port = \ ruby._cpu_ports[options.num_cpus+options.num_sc+1].slave gpu.ce.device_port = \ ruby._cpu_ports[options.num_cpus+options.num_sc+1].slave
def configureMemorySpaces(options): total_mem_range = AddrRange(options.total_mem_size) cpu_mem_range = total_mem_range gpu_mem_range = total_mem_range if options.split: buildEnv['PROTOCOL'] += '_split' total_mem_size = total_mem_range.size() gpu_mem_range = AddrRange(options.gpu_mem_size) if gpu_mem_range.size() >= total_mem_size: fatal("GPU memory size (%s) won't fit within total memory size (%s)!" % (options.gpu_mem_size, options.total_mem_size)) gpu_segment_base_addr = Addr(total_mem_size - gpu_mem_range.size()) gpu_mem_range = AddrRange(gpu_segment_base_addr, size = options.gpu_mem_size) options.total_mem_size = long(gpu_segment_base_addr) cpu_mem_range = AddrRange(options.total_mem_size) else: buildEnv['PROTOCOL'] += '_fusion' return (cpu_mem_range, gpu_mem_range)
def setUpPagewalkers(self, num, port, bypass_l1): tlbs = [] for i in range(num): # set to only a single entry here so that all requests are misses if buildEnv['TARGET_ISA'] == 'x86': from X86TLB import X86TLB t = X86TLB(size=1) t.walker.bypass_l1 = bypass_l1 elif buildEnv['TARGET_ISA'] == 'arm': from ArmTLB import ArmTLB t = ArmTLB(size=1) # ArmTLB does not yet include bypass_l1 option else: fatal('ShaderMMU only supports x86 and ARM architectures ' \ 'currently') t.walker.port = port tlbs.append(t) self.pagewalkers = tlbs
def createGPU(options, gpu_mem_range): gpgpusimOptions = parseGpgpusimConfig(options) gpu = CudaGPU(manage_gpu_memory = options.split, gpu_memory_range = gpu_mem_range) gpu.shader_cores = [CudaCore(id = i) for i in xrange(options.num_sc)] gpu.ce = GPUCopyEngine(driver_delay = 5000000) gpu.voltage_domain = VoltageDomain() gpu.clk_domain = SrcClockDomain(clock = options.gpu_core_clock, voltage_domain = gpu.voltage_domain) gpu.warp_size = options.gpu_warp_size for sc in gpu.shader_cores: sc.lsq = ShaderLSQ() sc.lsq.data_tlb.entries = options.gpu_tlb_entries print "Adding tlbid to all the Shader TLBs" sc.lsq.data_tlb.tlbid = sc.id #Sharmila sc.lsq.forward_flush = (buildEnv['PROTOCOL'] == 'VI_hammer_fusion' \ and options.flush_kernel_end) sc.lsq.warp_size = options.gpu_warp_size sc.lsq.cache_line_size = options.cacheline_size # sc.lsq.request_buffer_depth = options.gpu_l1_buf_depth if options.gpu_threads_per_core % options.gpu_warp_size: fatal("gpu_warp_size must divide gpu_threads_per_core evenly.") sc.lsq.warp_contexts = options.gpu_threads_per_core / options.gpu_warp_size # This is a stop-gap solution until we implement a better way to register device memory if options.access_host_pagetable: for sc in gpu.shader_cores: sc.itb.access_host_pagetable = True sc.lsq.data_tlb.access_host_pagetable = True gpu.ce.device_dtb.access_host_pagetable = True gpu.ce.host_dtb.access_host_pagetable = True gpu.shared_mem_delay = options.shMemDelay gpu.config_path = gpgpusimOptions gpu.dump_kernel_stats = options.kernel_stats return gpu
def parse_value(key, values): if len(values) == 0 or (len(values) == 1 and not values[0]): fatal("%s: '%s' doesn't have a value." % (url.geturl(), key)) elif len(values) > 1: fatal("%s: '%s' has multiple values." % (url.geturl(), key)) else: try: return key, literal_eval(values[0]) except ValueError: fatal("%s: %s isn't a valid Python literal" \ % (url.geturl(), values[0]))
np = options.num_cpus #system = System(cpu = [CPUClass(cpu_id=i) for i in xrange(np)], # physmem = SimpleMemory(range=AddrRange("512MB")), # membus = CoherentBus(), mem_mode = test_mem_mode) #system = System(cpu = [CPUClass(cpu_id=i) for i in xrange(np)], # physmem = SimpleMemory(in_addr_map = True)) system = System(cpu = [CPUClass(cpu_id=i) for i in xrange(np)], physmem = SimpleMemory(in_addr_map = True,range=AddrRange(options.mem_size))) # Sanity check if options.fastmem and (options.caches or options.l2cache): fatal("You cannot use fastmem in combination with caches!") #Added by Tianyun for identifying debbie run from gem5 run #if options.debbie is None: # print "Please sepcify debbie option" # sys.exit(1) #else: system.debbie = 0# options.debbie #Done addtion by Tianyun for i in xrange(np): if len(multiprocesses) == 1: system.cpu[i].workload = multiprocesses[0] else: system.cpu[i].workload = multiprocesses[i]
atomic = True CPUClass = None test_mem_mode = 'atomic' if not atomic: test_mem_mode = 'timing' return (TmpClass, test_mem_mode, CPUClass) ####################################################################### # # Check that we are running on a full-system arm simulator if not buildEnv['TARGET_ISA'] == "arm": fatal("Expected TARGET_ISA == arm"); ####################################################################### # # Set up basic configuration options parser = optparse.OptionParser() parser.add_option("--kernel", action="store", type="string") parser.add_option("--ramdisk", action="store", type="string") parser.add_option("-n", "--num_cpus", type="int", default=1) parser.add_option("--cpu-type", type="choice", default="atomic", choices = ["atomic", "arm_detailed"], help = "type of cpu to run with") parser.add_option("--caches", action="store_true") parser.add_option("--l2cache", action="store_true") parser.add_option("--l1d_size", type="string", default="64kB")
def build_test_system(np): cmdline = cmd_line_template() if buildEnv['TARGET_ISA'] == "alpha": test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby, cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "mips": test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "sparc": test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "x86": test_sys = makeLinuxX86System(test_mem_mode, np, bm[0], options.ruby, cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "arm": test_sys = makeArmSystem( test_mem_mode, options.machine_type, np, bm[0], options.dtb_filename, bare_metal=options.bare_metal, cmdline=cmdline, external_memory=options.external_memory_system, ruby=options.ruby, security=options.enable_security_extensions) if options.enable_context_switch_stats_dump: test_sys.enable_context_switch_stats_dump = True else: fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA']) # Set the cache line size for the entire system test_sys.cache_line_size = options.cacheline_size # Create a top-level voltage domain test_sys.voltage_domain = VoltageDomain(voltage=options.sys_voltage) # Create a source clock for the system and set the clock period test_sys.clk_domain = SrcClockDomain( clock=options.sys_clock, voltage_domain=test_sys.voltage_domain) # Create a CPU voltage domain test_sys.cpu_voltage_domain = VoltageDomain() # Create a source clock for the CPUs and set the clock period test_sys.cpu_clk_domain = SrcClockDomain( clock=options.cpu_clock, voltage_domain=test_sys.cpu_voltage_domain) if options.kernel is not None: test_sys.kernel = binary(options.kernel) else: print("Error: a kernel must be provided to run in full system mode") sys.exit(1) if options.script is not None: test_sys.readfile = options.script if options.lpae: test_sys.have_lpae = True if options.virtualisation: test_sys.have_virtualization = True test_sys.init_param = options.init_param # For now, assign all the CPUs to the same clock domain test_sys.cpu = [ TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i) for i in range(np) ] if ObjectList.is_kvm_cpu(TestCPUClass) or \ ObjectList.is_kvm_cpu(FutureClass): test_sys.kvm_vm = KvmVM() if options.ruby: bootmem = getattr(test_sys, '_bootmem', None) Ruby.create_system(options, True, test_sys, test_sys.iobus, test_sys._dma_ports, bootmem) # Create a seperate clock domain for Ruby test_sys.ruby.clk_domain = SrcClockDomain( clock=options.ruby_clock, voltage_domain=test_sys.voltage_domain) # Connect the ruby io port to the PIO bus, # assuming that there is just one such port. test_sys.iobus.master = test_sys.ruby._io_port.slave for (i, cpu) in enumerate(test_sys.cpu): # # Tie the cpu ports to the correct ruby system ports # cpu.clk_domain = test_sys.cpu_clk_domain cpu.createThreads() cpu.createInterruptController() cpu.icache_port = test_sys.ruby._cpu_ports[i].slave cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave if buildEnv['TARGET_ISA'] in ("x86", "arm"): cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave if buildEnv['TARGET_ISA'] in "x86": cpu.interrupts[0].pio = test_sys.ruby._cpu_ports[i].master cpu.interrupts[0].int_master = test_sys.ruby._cpu_ports[ i].slave cpu.interrupts[0].int_slave = test_sys.ruby._cpu_ports[ i].master else: if options.caches or options.l2cache: # By default the IOCache runs at the system clock test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges) test_sys.iocache.cpu_side = test_sys.iobus.master test_sys.iocache.mem_side = test_sys.membus.slave elif not options.external_memory_system: test_sys.iobridge = Bridge(delay='50ns', ranges=test_sys.mem_ranges) test_sys.iobridge.slave = test_sys.iobus.master test_sys.iobridge.master = test_sys.membus.slave # Sanity check if options.simpoint_profile: if not ObjectList.is_noncaching_cpu(TestCPUClass): fatal("SimPoint generation should be done with atomic cpu") if np > 1: fatal( "SimPoint generation not supported with more than one CPUs" ) for i in range(np): if options.simpoint_profile: test_sys.cpu[i].addSimPointProbe(options.simpoint_interval) if options.checker: test_sys.cpu[i].addCheckerCpu() if not ObjectList.is_kvm_cpu(TestCPUClass): if options.bp_type: bpClass = ObjectList.bp_list.get(options.bp_type) test_sys.cpu[i].branchPred = bpClass() if options.indirect_bp_type: IndirectBPClass = ObjectList.indirect_bp_list.get( options.indirect_bp_type) test_sys.cpu[i].branchPred.indirectBranchPred = \ IndirectBPClass() test_sys.cpu[i].createThreads() # If elastic tracing is enabled when not restoring from checkpoint and # when not fast forwarding using the atomic cpu, then check that the # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check # passes then attach the elastic trace probe. # If restoring from checkpoint or fast forwarding, the code that does this for # FutureCPUClass is in the Simulation module. If the check passes then the # elastic trace probe is attached to the switch CPUs. if options.elastic_trace_en and options.checkpoint_restore == None and \ not options.fast_forward: CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, options) CacheConfig.config_cache(options, test_sys) MemConfig.config_mem(options, test_sys) return test_sys
print >>sys.stderr, "Unable to find workload for %s: %s" % ( buildEnv['TARGET_ISA'], app) sys.exit(1) elif options.cmd: multiprocesses, numThreads = get_processes(options) else: print >> sys.stderr, "No workload specified. Exiting!\n" sys.exit(1) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads # Check -- do not allow SMT with multiple CPUs if options.smt and options.num_cpus > 1: fatal("You cannot use SMT with multiple CPUs!") np = options.num_cpus system = System(cpu = [CPUClass(cpu_id=i) for i in xrange(np)], mem_mode = test_mem_mode, mem_ranges = [AddrRange(options.mem_size)], cache_line_size = options.cacheline_size) # Create a top-level voltage domain system.voltage_domain = VoltageDomain(voltage = options.sys_voltage) # Create a source clock for the system and set the clock period system.clk_domain = SrcClockDomain(clock = options.sys_clock, voltage_domain = system.voltage_domain) # Create a CPU voltage domain
def setup_memory_controllers(system, ruby, dir_cntrls, options): ruby.block_size_bytes = options.cacheline_size ruby.memory_size_bits = 48 block_size_bits = int(math.log(options.cacheline_size, 2)) if options.numa_high_bit: numa_bit = options.numa_high_bit else: # if the numa_bit is not specified, set the directory bits as the # lowest bits above the block offset bits, and the numa_bit as the # highest of those directory bits dir_bits = int(math.log(options.num_dirs, 2)) numa_bit = block_size_bits + dir_bits - 1 index = 0 mem_ctrls = [] crossbars = [] # Sets bits to be used for interleaving. Creates memory controllers # attached to a directory controller. A separate controller is created # for each address range as the abstract memory can handle only one # contiguous address range as of now. for dir_cntrl in dir_cntrls: # Create 1 instance of DRAMCache per directory controller if options.dramcache: dramcache_ctrl = MemConfig.create_dramcache_ctrl( MemConfig.get_cache(options.dramcache_type), system.mem_ranges[0], index, options.num_dirs, options.dramcache_size, options.dramcache_assoc, options.dramcache_block_size, options.num_cpus, options.dramcache_timing) mem_ctrls.append(dramcache_ctrl) dir_cntrl.memory = dramcache_ctrl.port dir_cntrl.directory.numa_high_bit = numa_bit crossbar = None if len(system.mem_ranges) > 1: # we dont support this fatal("system mem_ranges greater than 1") crossbar = IOXBar() crossbars.append(crossbar) if options.dramcache: dramcache_ctrl.dramcache_masterport = crossbar.slave else: dir_cntrl.memory = crossbar.slave for r in system.mem_ranges: # if dramcache exists interleave at dramcache_block_size if options.dramcache: mem_ctrl = MemConfig.create_mem_ctrl( MemConfig.get(options.mem_type), r, index, options.num_dirs, int(math.log(options.num_dirs, 2)), options.dramcache_block_size) else: mem_ctrl = MemConfig.create_mem_ctrl( MemConfig.get(options.mem_type), r, index, options.num_dirs, int(math.log(options.num_dirs, 2)), options.cacheline_size) mem_ctrls.append(mem_ctrl) if crossbar != None: mem_ctrl.port = crossbar.master else: if options.dramcache: mem_ctrl.port = dramcache_ctrl.dramcache_masterport else: mem_ctrl.port = dir_cntrl.memory index += 1 system.mem_ctrls = mem_ctrls if len(crossbars) > 0: ruby.crossbars = crossbars
from common import Options from common import Simulation from common import CacheConfig from common import CpuConfig from common import MemConfig from common.Caches import * from common.cpu2000 import * parser = optparse.OptionParser() Options.addCommonOptions(parser) Options.addSEOptions(parser) if '--ruby' in sys.argv: Ruby.define_options(parser) else: fatal("This test is only for FPGA in Ruby. Please set --ruby.\n") (options, args) = parser.parse_args() if args: print "Error: script doesn't take any positional arguments" sys.exit(1) numThreads = 1 process1 = LiveProcess() process1.pid = 1100 process1.cmd = ['tests/test-progs/motion/motion-fpga'] (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads
def createAladdinDatapath(config, accel): memory_type = config.get(accel, 'memory_type').lower() # Accelerators need their own clock domain! cycleTime = config.getint(accel, "cycle_time") clock = "%1.3fGHz" % (1.0 / cycleTime) clk_domain = SrcClockDomain(clock=clock, voltage_domain=system.cpu_voltage_domain) # Set the globally required parameters. datapath = HybridDatapath( clk_domain=clk_domain, benchName=accel, # TODO: Ideally bench_name would change to output_prefix but that's # a pretty big breaking change. outputPrefix=config.get(accel, "bench_name"), traceFileName=config.get(accel, "trace_file_name"), configFileName=config.get(accel, "config_file_name"), acceleratorName="%s_datapath" % accel, acceleratorId=config.getint(accel, "accelerator_id"), cycleTime=cycleTime, useDb=config.getboolean(accel, "use_db"), experimentName=config.get(accel, "experiment_name"), enableStatsDump=options.enable_stats_dump_and_resume) datapath.cacheLineFlushLatency = config.getint(accel, "cacheline_flush_latency") datapath.cacheLineInvalidateLatency = config.getint( accel, "cacheline_invalidate_latency") datapath.dmaSetupOverhead = config.getint(accel, "dma_setup_overhead") datapath.maxDmaRequests = config.getint(accel, "max_dma_requests") datapath.numDmaChannels = config.getint(accel, "num_dma_channels") datapath.dmaChunkSize = config.getint(accel, "dma_chunk_size") datapath.pipelinedDma = config.getboolean(accel, "pipelined_dma") datapath.ignoreCacheFlush = config.getboolean(accel, "ignore_cache_flush") datapath.invalidateOnDmaStore = config.getboolean( accel, "invalidate_on_dma_store") datapath.recordMemoryTrace = config.getboolean(accel, "record_memory_trace") datapath.enableAcp = config.getboolean(accel, "enable_acp") datapath.useAcpCache = True datapath.acpCacheSize = config.get(accel, "acp_cache_size") datapath.acpCacheLatency = config.getint(accel, "acp_cache_latency") datapath.acpCacheMSHRs = config.getint(accel, "acp_cache_mshrs") datapath.useAladdinDebugger = options.aladdin_debugger if memory_type == "cache": datapath.cacheSize = config.get(accel, "cache_size") datapath.cacheBandwidth = config.get(accel, "cache_bandwidth") datapath.cacheQueueSize = config.get(accel, "cache_queue_size") datapath.cacheAssoc = config.getint(accel, "cache_assoc") datapath.cacheHitLatency = config.getint(accel, "cache_hit_latency") datapath.cacheLineSize = options.cacheline_size datapath.cactiCacheConfig = config.get(accel, "cacti_cache_config") datapath.tlbEntries = config.getint(accel, "tlb_entries") datapath.tlbAssoc = config.getint(accel, "tlb_assoc") datapath.tlbHitLatency = config.getint(accel, "tlb_hit_latency") datapath.tlbMissLatency = config.getint(accel, "tlb_miss_latency") datapath.tlbCactiConfig = config.get(accel, "cacti_tlb_config") datapath.tlbPageBytes = config.getint(accel, "tlb_page_size") datapath.numOutStandingWalks = config.getint( accel, "tlb_max_outstanding_walks") datapath.tlbBandwidth = config.getint(accel, "tlb_bandwidth") elif memory_type == "spad" and options.ruby: # If the memory_type is spad, Aladdin will initialize a 2-way cache # for every datapath, although this cache will not be used in # simulation. Ruby doesn't support direct-mapped caches, so set the # assoc to 2. datapath.cacheAssoc = 2 if (memory_type != "cache" and memory_type != "spad"): fatal("Aladdin configuration file specified invalid memory type %s " "for accelerator %s." % (memory_type, accel)) setattr(system, datapath.acceleratorName, datapath)
# Check for timing mode because ruby does not support atomic accesses if not (options.cpu_type == "detailed" or options.cpu_type == "timing"): print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!" sys.exit(1) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) TestMemClass = Simulation.setMemClass(options) if buildEnv['TARGET_ISA'] == "alpha": system = makeLinuxAlphaRubySystem(test_mem_mode, bm[0]) elif buildEnv['TARGET_ISA'] == "x86": system = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0], True) Simulation.setWorkCountOptions(system, options) else: fatal("incapable of building non-alpha or non-x86 full system!") system.cache_line_size = options.cacheline_size # Create a top-level voltage domain and clock domain system.voltage_domain = VoltageDomain(voltage=options.sys_voltage) system.clk_domain = SrcClockDomain(clock=options.sys_clock, voltage_domain=system.voltage_domain) if options.kernel is not None: system.kernel = binary(options.kernel) if options.script is not None: system.readfile = options.script system.cpu = [CPUClass(cpu_id=i) for i in xrange(options.num_cpus)]
] if args.caches or args.l2cache: # By default the IOCache runs at the system clock system.iocache = IOCache(addr_ranges=system.mem_ranges) system.iocache.cpu_side = system.iobus.mem_side_ports system.iocache.mem_side = system.membus.cpu_side_ports elif not args.external_memory_system: system.iobridge = Bridge(delay='50ns', ranges=system.mem_ranges) system.iobridge.cpu_side_port = system.iobus.mem_side_ports system.iobridge.mem_side_port = system.membus.cpu_side_ports # Sanity check if args.simpoint_profile: if not ObjectList.is_noncaching_cpu(CPUClass): fatal("SimPoint generation should be done with atomic cpu") if np > 1: fatal("SimPoint generation not supported with more than one CPUs") for i in range(np): if args.simpoint_profile: system.cpu[i].addSimPointProbe(args.simpoint_interval) if args.checker: system.cpu[i].addCheckerCpu() if not ObjectList.is_kvm_cpu(CPUClass): if args.bp_type: bpClass = ObjectList.bp_list.get(args.bp_type) system.cpu[i].branchPred = bpClass() if args.indirect_bp_type: IndirectBPClass = ObjectList.indirect_bp_list.get( args.indirect_bp_type)
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Two_Level': fatal("This script requires the MESI_Two_Level protocol to be built.") ruby_system.num_simics_net_ports = options.num_networkports ruby_system.num_accelerators = options.accelerators ruby_system.num_TDs = options.num_tds cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # netport_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) assert (options.num_networkports == options.num_l2caches) num_l1_cntrls = ( (options.accelerators + options.num_tds + options.num_networkports - 1) / options.num_networkports) * options.num_networkports print "num_l1_cntrls = %d" % num_l1_cntrls assert (num_l1_cntrls >= (options.accelerators + options.num_tds)) for i in xrange(options.num_networkports): # First create the Ruby objects associated with # the CPU and Accelerator signal communication netport_cntrl = gem5NetworkPortInterface_Controller( version=i, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.netport_cntrl%d = netport_cntrl" % i) netport_cntrl_nodes.append(netport_cntrl) # Connect the netport controller to the network netport_cntrl.messageOut = ruby_system.network.slave netport_cntrl.messageIn = ruby_system.network.master for i in xrange(num_l1_cntrls): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size=options.l1i_size, assoc=options.l1i_assoc, start_index_bit=block_size_bits, is_icache=True) l1d_cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc, start_index_bit=block_size_bits, is_icache=False) prefetcher = RubyPrefetcher.Prefetcher() l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, l2_select_num_bits=l2_bits, l2_select_low_bit=block_size_bits, send_evictions=send_evicts(options), prefetcher=prefetcher, ruby_system=ruby_system, clk_domain=system.cpu[0].clk_domain, transitions_per_cycle=options.ports, enable_prefetch=False) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=system.cpu[0].clk_domain, ruby_system=ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists if len(cpu_sequencers) < options.num_cpus: cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.requestFromL1Cache = ruby_system.network.slave l1_cntrl.responseFromL1Cache = ruby_system.network.slave l1_cntrl.unblockFromL1Cache = ruby_system.network.slave l1_cntrl.requestToL1Cache = ruby_system.network.master l1_cntrl.responseToL1Cache = ruby_system.network.master l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size=options.l2_size, assoc=options.l2_assoc, start_index_bit=l2_index_start) l2_cntrl = L2Cache_Controller(version=i, L2cache=l2_cache, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = ruby_system.network.slave l2_cntrl.responseFromL2Cache = ruby_system.network.slave l2_cntrl.unblockToL2Cache = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = ruby_system.network.master l2_cntrl.responseToL2Cache = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert (phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version=i, directory=RubyDirectoryMemory( version=i, size=dir_size), transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = ruby_system.network.master dir_cntrl.responseToDir = ruby_system.network.master dir_cntrl.responseFromDir = ruby_system.network.slave for i, dma_port in enumerate(dma_ports): # Create the Ruby objects associated with the dma controller dma_seq = DMASequencer(version=i, ruby_system=ruby_system, slave=dma_port) dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq, transitions_per_cycle=options.ports, ruby_system=ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.responseFromDir = ruby_system.network.master dma_cntrl.requestToDir = ruby_system.network.slave all_cntrls = netport_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version=len(dma_ports), dma_sequencer=io_seq, ruby_system=ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = ruby_system.network.master io_controller.requestToDir = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
if buildEnv['TARGET_ISA'] == "alpha": test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0]) elif buildEnv['TARGET_ISA'] == "mips": test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0]) elif buildEnv['TARGET_ISA'] == "sparc": test_sys = makeSparcSystem(test_mem_mode, bm[0]) elif buildEnv['TARGET_ISA'] == "x86": test_sys = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0]) elif buildEnv['TARGET_ISA'] == "arm": test_sys = makeArmSystem(test_mem_mode, options.machine_type, bm[0], options.dtb_filename, bare_metal=options.bare_metal) else: fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA']) if options.kernel is not None: test_sys.kernel = binary(options.kernel) if options.script is not None: test_sys.readfile = options.script test_sys.init_param = options.init_param test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)] if options.caches or options.l2cache: test_sys.iocache = IOCache(clock='1GHz', addr_ranges=test_sys.mem_ranges) test_sys.iocache.cpu_side = test_sys.iobus.master test_sys.iocache.mem_side = test_sys.membus.slave
multiprocesses = [] numThreads = 1 if options.cmd: multiprocesses, numThreads = get_processes(options) else: print >> sys.stderr, "No workload specified. Exiting!\n" sys.exit(1) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads # Check -- do not allow SMT with multiple CPUs if options.smt and options.num_cpus > 1: fatal("You cannot use SMT with multiple CPUs!") np = options.num_cpus system = System(cpu=[CPUClass(cpu_id=i) for i in xrange(np)], mem_mode=test_mem_mode, mem_ranges=[AddrRange(options.mem_size)], cache_line_size=options.cacheline_size) hw3opts.set_config(system.cpu, options) if numThreads > 1: system.multi_thread = True # Create a top-level voltage domain system.voltage_domain = VoltageDomain(voltage=options.sys_voltage) # Create a source clock for the system and set the clock period
if '--ruby' in sys.argv: print("This script does not support Ruby configuration, mainly" " because Trace CPU has been tested only with classic memory system") sys.exit(1) (options, args) = parser.parse_args() if args: print("Error: script doesn't take any positional arguments") sys.exit(1) numThreads = 1 if options.cpu_type != "TraceCPU": fatal("This is a script for elastic trace replay simulation, use "\ "--cpu-type=TraceCPU\n") if options.num_cpus > 1: fatal("This script does not support multi-processor trace replay.\n") # In this case FutureClass will be None as there is not fast forwarding or # switching (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads system = System(cpu=CPUClass(cpu_id=0), mem_mode=test_mem_mode, mem_ranges=[AddrRange(options.mem_size)], cache_line_size=options.cacheline_size) # Create a top-level voltage domain
def create_system(options, full_system, system, dma_ports, ruby_system): if buildEnv['PROTOCOL'] != 'MESI_Two_Level': fatal("This script requires the MESI_Two_Level protocol to be built.") ruby_system.num_simics_net_ports = options.num_networkports ruby_system.num_accelerators = options.accelerators ruby_system.num_TDs = options.num_tds cpu_sequencers = [] # # The ruby network creation expects the list of nodes in the system to be # consistent with the NetDest list. Therefore the l1 controller nodes must be # listed before the directory nodes and directory nodes before dma nodes, etc. # netport_cntrl_nodes = [] l1_cntrl_nodes = [] l2_cntrl_nodes = [] dir_cntrl_nodes = [] dma_cntrl_nodes = [] # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) assert(options.num_networkports == options.num_l2caches) num_l1_cntrls = ((options.accelerators + options.num_tds + options.num_networkports - 1)/options.num_networkports) * options.num_networkports print "num_l1_cntrls = %d" % num_l1_cntrls assert(num_l1_cntrls >= (options.accelerators + options.num_tds)) for i in xrange(options.num_networkports): # First create the Ruby objects associated with # the CPU and Accelerator signal communication netport_cntrl = gem5NetworkPortInterface_Controller(version = i, transitions_per_cycle=options.ports, ruby_system = ruby_system) exec("ruby_system.netport_cntrl%d = netport_cntrl" % i) netport_cntrl_nodes.append(netport_cntrl) # Connect the netport controller to the network netport_cntrl.messageOut = ruby_system.network.slave netport_cntrl.messageIn = ruby_system.network.master for i in xrange(num_l1_cntrls): # # First create the Ruby objects associated with this cpu # l1i_cache = L1Cache(size = options.l1i_size, assoc = options.l1i_assoc, start_index_bit = block_size_bits, is_icache = True) l1d_cache = L1Cache(size = options.l1d_size, assoc = options.l1d_assoc, start_index_bit = block_size_bits, is_icache = False) prefetcher = RubyPrefetcher.Prefetcher() l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, l2_select_num_bits = l2_bits, l2_select_low_bit = block_size_bits, send_evictions = send_evicts(options), prefetcher = prefetcher, ruby_system = ruby_system, clk_domain=system.cpu[0].clk_domain, transitions_per_cycle=options.ports, enable_prefetch = False) cpu_seq = RubySequencer(version = i, icache = l1i_cache, dcache = l1d_cache, clk_domain=system.cpu[0].clk_domain, ruby_system = ruby_system) l1_cntrl.sequencer = cpu_seq exec("ruby_system.l1_cntrl%d = l1_cntrl" % i) # Add controllers and sequencers to the appropriate lists if len(cpu_sequencers) < options.num_cpus : cpu_sequencers.append(cpu_seq) l1_cntrl_nodes.append(l1_cntrl) # Connect the L1 controllers and the network l1_cntrl.requestFromL1Cache = ruby_system.network.slave l1_cntrl.responseFromL1Cache = ruby_system.network.slave l1_cntrl.unblockFromL1Cache = ruby_system.network.slave l1_cntrl.requestToL1Cache = ruby_system.network.master l1_cntrl.responseToL1Cache = ruby_system.network.master l2_index_start = block_size_bits + l2_bits for i in xrange(options.num_l2caches): # # First create the Ruby objects associated with this cpu # l2_cache = L2Cache(size = options.l2_size, assoc = options.l2_assoc, start_index_bit = l2_index_start) l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, transitions_per_cycle=options.ports, ruby_system = ruby_system) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) # Connect the L2 controllers and the network l2_cntrl.DirRequestFromL2Cache = ruby_system.network.slave l2_cntrl.L1RequestFromL2Cache = ruby_system.network.slave l2_cntrl.responseFromL2Cache = ruby_system.network.slave l2_cntrl.unblockToL2Cache = ruby_system.network.master l2_cntrl.L1RequestToL2Cache = ruby_system.network.master l2_cntrl.responseToL2Cache = ruby_system.network.master phys_mem_size = sum(map(lambda r: r.size(), system.mem_ranges)) assert(phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system # clk_divider value is a fix to pass regression. ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain=ruby_system.clk_domain, clk_divider=3) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, directory = RubyDirectoryMemory( version = i, size = dir_size), transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) # Connect the directory controllers and the network dir_cntrl.requestToDir = ruby_system.network.master dir_cntrl.responseToDir = ruby_system.network.master dir_cntrl.responseFromDir = ruby_system.network.slave for i, dma_port in enumerate(dma_ports): # Create the Ruby objects associated with the dma controller dma_seq = DMASequencer(version = i, ruby_system = ruby_system, slave = dma_port) dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) # Connect the dma controller to the network dma_cntrl.responseFromDir = ruby_system.network.master dma_cntrl.requestToDir = ruby_system.network.slave all_cntrls = netport_cntrl_nodes + \ l1_cntrl_nodes + \ l2_cntrl_nodes + \ dir_cntrl_nodes + \ dma_cntrl_nodes # Create the io controller and the sequencer if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system) ruby_system.io_controller = io_controller # Connect the dma controller to the network io_controller.responseFromDir = ruby_system.network.master io_controller.requestToDir = ruby_system.network.slave all_cntrls = all_cntrls + [io_controller] topology = create_topology(all_cntrls, options) return (cpu_sequencers, dir_cntrl_nodes, topology)
sys.exit(1) elif options.cmd: multiprocesses, numThreads = get_processes(options) else: print >> sys.stderr, "No workload specified. Exiting!\n" sys.exit(1) #JON_print_members(options, "OPTIONS", 156) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads # Check -- do not allow SMT with multiple CPUs if options.smt and options.num_cpus > 1: fatal("You cannot use SMT with multiple CPUs!") np = options.num_cpus system = System(cpu = [CPUClass(cpu_id=i) for i in xrange(np)], mem_mode = test_mem_mode, mem_ranges = [AddrRange(options.mem_size)], cache_line_size = options.cacheline_size) # Create a top-level voltage domain system.voltage_domain = VoltageDomain(voltage = options.sys_voltage) # Create a source clock for the system and set the clock period system.clk_domain = SrcClockDomain(clock = options.sys_clock, voltage_domain = system.voltage_domain) # Create a CPU voltage domain
def __init__(self): if buildEnv['PROTOCOL'] != 'MI_example': fatal("This system assumes MI_example!") super(MIExampleSystem, self).__init__()
def build_test_system(np): cmdline = cmd_line_template() if buildEnv['TARGET_ISA'] == "alpha": test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby, cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "mips": test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "sparc": test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "x86": test_sys = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0], options.ruby, cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "arm": test_sys = makeArmSystem( test_mem_mode, options.machine_type, options.num_cpus, bm[0], options.dtb_filename, bare_metal=options.bare_metal, cmdline=cmdline, external_memory=options.external_memory_system) if options.enable_context_switch_stats_dump: test_sys.enable_context_switch_stats_dump = True else: fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA']) # Set the cache line size for the entire system test_sys.cache_line_size = options.cacheline_size # Create a top-level voltage domain test_sys.voltage_domain = VoltageDomain(voltage=options.sys_voltage) # Create a source clock for the system and set the clock period test_sys.clk_domain = SrcClockDomain( clock=options.sys_clock, voltage_domain=test_sys.voltage_domain) # Create a CPU voltage domain test_sys.cpu_voltage_domain = VoltageDomain() # Create a source clock for the CPUs and set the clock period test_sys.cpu_clk_domain = SrcClockDomain( clock=options.cpu_clock, voltage_domain=test_sys.cpu_voltage_domain) if options.kernel is not None: test_sys.kernel = binary(options.kernel) if options.script is not None: test_sys.readfile = options.script if options.lpae: test_sys.have_lpae = True if options.virtualisation: test_sys.have_virtualization = True test_sys.init_param = options.init_param # For now, assign all the CPUs to the same clock domain test_sys.cpu = [ TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i) for i in xrange(np) ] if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass): test_sys.vm = KvmVM() if options.ruby: # Check for timing mode because ruby does not support atomic accesses if not (options.cpu_type == "detailed" or options.cpu_type == "timing"): print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!" sys.exit(1) Ruby.create_system(options, True, test_sys, test_sys.iobus, test_sys._dma_ports) # Create a seperate clock domain for Ruby test_sys.ruby.clk_domain = SrcClockDomain( clock=options.ruby_clock, voltage_domain=test_sys.voltage_domain) # Connect the ruby io port to the PIO bus, # assuming that there is just one such port. test_sys.iobus.master = test_sys.ruby._io_port.slave for (i, cpu) in enumerate(test_sys.cpu): # # Tie the cpu ports to the correct ruby system ports # cpu.clk_domain = test_sys.cpu_clk_domain cpu.createThreads() cpu.createInterruptController() cpu.icache_port = test_sys.ruby._cpu_ports[i].slave cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave if buildEnv['TARGET_ISA'] == "x86": cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave cpu.interrupts[0].pio = test_sys.ruby._cpu_ports[i].master cpu.interrupts[0].int_master = test_sys.ruby._cpu_ports[ i].slave cpu.interrupts[0].int_slave = test_sys.ruby._cpu_ports[ i].master else: if options.caches or options.l2cache: # By default the IOCache runs at the system clock test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges) test_sys.iocache.cpu_side = test_sys.iobus.master test_sys.iocache.mem_side = test_sys.membus.slave elif not options.external_memory_system: test_sys.iobridge = Bridge(delay='50ns', ranges=test_sys.mem_ranges) test_sys.iobridge.slave = test_sys.iobus.master test_sys.iobridge.master = test_sys.membus.slave # Sanity check if options.fastmem: if TestCPUClass != AtomicSimpleCPU: fatal("Fastmem can only be used with atomic CPU!") if (options.caches or options.l2cache): fatal("You cannot use fastmem in combination with caches!") if options.simpoint_profile: if not options.fastmem: # Atomic CPU checked with fastmem option already fatal( "SimPoint generation should be done with atomic cpu and fastmem" ) if np > 1: fatal( "SimPoint generation not supported with more than one CPUs" ) for i in xrange(np): if options.fastmem: test_sys.cpu[i].fastmem = True if options.simpoint_profile: test_sys.cpu[i].addSimPointProbe(options.simpoint_interval) if options.checker: test_sys.cpu[i].addCheckerCpu() test_sys.cpu[i].createThreads() # If elastic tracing is enabled when not restoring from checkpoint and # when not fast forwarding using the atomic cpu, then check that the # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check # passes then attach the elastic trace probe. # If restoring from checkpoint or fast forwarding, the code that does this for # FutureCPUClass is in the Simulation module. If the check passes then the # elastic trace probe is attached to the switch CPUs. if options.elastic_trace_en and options.checkpoint_restore == None and \ not options.fast_forward: CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, options) CacheConfig.config_cache(options, test_sys) MemConfig.config_mem(options, test_sys) return test_sys
def config_mem(options, system): """ Create the memory controllers based on the options and attach them. If requested, we make a multi-channel configuration of the selected memory controller class by creating multiple instances of the specific class. The individual controllers have their parameters set such that the address range is interleaved between them. """ nbr_mem_ctrls = options.mem_channels import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2 ** intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") cls = get(options.mem_type) mem_ctrls = [] # The default behaviour is to interleave on cache line granularity cache_line_bit = int(math.log(system.cache_line_size.value, 2)) - 1 intlv_low_bit = cache_line_bit # For every range (most systems will only have one), create an # array of controllers and set their parameters to match their # address mapping in the case of a DRAM for r in system.mem_ranges: for i in xrange(nbr_mem_ctrls): # Create an instance so we can figure out the address # mapping and row-buffer size ctrl = cls() # Only do this for DRAMs if issubclass(cls, m5.objects.DRAMCtrl): # Inform each controller how many channels to account # for ctrl.channels = nbr_mem_ctrls # If the channel bits are appearing after the column # bits, we need to add the appropriate number of bits # for the row buffer size if ctrl.addr_mapping.value == 'RoRaBaChCo': # This computation only really needs to happen # once, but as we rely on having an instance we # end up having to repeat it for each and every # one rowbuffer_size = ctrl.device_rowbuffer_size.value * \ ctrl.devices_per_rank.value intlv_low_bit = int(math.log(rowbuffer_size, 2)) - 1 # We got all we need to configure the appropriate address # range ctrl.range = m5.objects.AddrRange(r.start, size = r.size(), intlvHighBit = \ intlv_low_bit + intlv_bits, intlvBits = intlv_bits, intlvMatch = i) mem_ctrls.append(ctrl) system.mem_ctrls = mem_ctrls # Connect the controllers to the membus for i in xrange(len(system.mem_ctrls)): system.mem_ctrls[i].port = system.membus.master
size=options.mem_size), shared=True, shmkey=options.shm_key, needdump=dumpddr), ddrmem2=SimpleMemory(range=AddrRange(0x80000000, 0xBFFFFFFF), needdump=dumpddr), ddrmem3=SimpleMemory(range=AddrRange(0xC0000000, 0xFFFFFFFF), needdump=dumpddr), shmem=SimpleMemory(range=AddrRange(0x40400000, 0x407FFFFF), shared=True, shmkey=options.shm_key, needdump=dumpshare)) # Sanity check if options.fastmem and (options.caches or options.l2cache): fatal("You cannot use fastmem in combination with caches!") for i in xrange(np): system.cpu[i].workload = processes[i] if options.fastmem: system.cpu[i].fastmem = True if options.checker: system.cpu[i].addCheckerCpu() #if options.ruby: # if not (options.cpu_type == "detailed" or options.cpu_type == "timing"): # print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!" # sys.exit(1)
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Authors: Ali Saidi import optparse import os import sys import m5 from m5.defines import buildEnv from m5.objects import * from m5.util import addToPath, fatal if not buildEnv['FULL_SYSTEM']: fatal("This script requires full-system mode (*_FS).") addToPath('../common') from FSConfig import * from SysPaths import * from Benchmarks import * import Simulation import CacheConfig from Caches import * # Get paths we might need. It's expected this file is in m5/configs/example. config_path = os.path.dirname(os.path.abspath(__file__)) config_root = os.path.dirname(config_path) parser = optparse.OptionParser()
# print("Unable to find workload for %s: %s" % # (buildEnv['TARGET_ISA'], app), # file=sys.stderr) # sys.exit(1) #elif options.cmd: # multiprocesses, numThreads = get_processes(options) #else: # print("No workload specified. Exiting!\n", file=sys.stderr) # sys.exit(1) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads # Check -- do not allow SMT with multiple CPUs if options.smt and options.num_cpus > 1: fatal("You cannot use SMT with multiple CPUs!") np = options.num_cpus system = System(cpu=[CPUClass(cpu_id=i) for i in range(np)], mem_mode=test_mem_mode, mem_ranges=[AddrRange(options.mem_size)], cache_line_size=options.cacheline_size) if numThreads > 1: system.multi_thread = True # Create a top-level voltage domain system.voltage_domain = VoltageDomain(voltage=options.sys_voltage) # Create a source clock for the system and set the clock period system.clk_domain = SrcClockDomain(clock=options.sys_clock,
# multiprocesses, numThreads = get_processes(options) # elif options.benchmark: # for i in xrange(options.num_cpus): # multiprocesses.append(process) # else: # print("No workload specified. Exiting!\n", file=sys.stderr) # sys.exit(1) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads # Check -- do not allow SMT with multiple CPUs if options.smt and options.num_cpus > 1: fatal("You cannot use SMT with multiple CPUs!") np = options.num_cpus system = System(cpu=[CPUClass(cpu_id=i) for i in range(np)], mem_mode=test_mem_mode, mem_ranges=[AddrRange(options.mem_size)], cache_line_size=options.cacheline_size) if numThreads > 1: system.multi_thread = True # Create a top-level voltage domain system.voltage_domain = VoltageDomain(voltage=options.sys_voltage) # Create a source clock for the system and set the clock period system.clk_domain = SrcClockDomain(clock=options.sys_clock,
else: if options.caches or options.l2cache: # By default the IOCache runs at the system clock test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges) test_sys.iocache.cpu_side = test_sys.iobus.master test_sys.iocache.mem_side = test_sys.membus.slave else: test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges) test_sys.iobridge.slave = test_sys.iobus.master test_sys.iobridge.master = test_sys.membus.slave # Sanity check if options.fastmem: if TestCPUClass != AtomicSimpleCPU: fatal("Fastmem can only be used with atomic CPU!") if (options.caches or options.l2cache): fatal("You cannot use fastmem in combination with caches!") for i in xrange(np): if options.fastmem: test_sys.cpu[i].fastmem = True if options.checker: test_sys.cpu[i].addCheckerCpu() test_sys.cpu[i].createThreads() CacheConfig.config_cache(options, test_sys) MemConfig.config_mem(options, test_sys) return test_sys
def __init__(self): if buildEnv['PROTOCOL'] != 'MSI': fatal("This system assumes MSI from learning gem5!") super(TestCacheSystem, self).__init__()
def config_mem(options, system): """ Create the memory controllers based on the options and attach them. If requested, we make a multi-channel configuration of the selected memory controller class by creating multiple instances of the specific class. The individual controllers have their parameters set such that the address range is interleaved between them. """ # Mandatory options opt_mem_type = options.mem_type opt_mem_channels = options.mem_channels # Optional options opt_tlm_memory = getattr(options, "tlm_memory", None) opt_external_memory_system = getattr(options, "external_memory_system", None) opt_elastic_trace_en = getattr(options, "elastic_trace_en", False) opt_mem_ranks = getattr(options, "mem_ranks", None) opt_dram_powerdown = getattr(options, "enable_dram_powerdown", None) if opt_mem_type == "HMC_2500_1x32": HMChost = HMC.config_hmc_host_ctrl(options, system) HMC.config_hmc_dev(options, system, HMChost.hmc_host) subsystem = system.hmc_dev xbar = system.hmc_dev.xbar else: subsystem = system xbar = system.membus if opt_tlm_memory: system.external_memory = m5.objects.ExternalSlave( port_type="tlm_slave", port_data=opt_tlm_memory, port=system.membus.master, addr_ranges=system.mem_ranges) system.kernel_addr_check = False return if opt_external_memory_system: subsystem.external_memory = m5.objects.ExternalSlave( port_type=opt_external_memory_system, port_data="init_mem0", port=xbar.master, addr_ranges=system.mem_ranges) subsystem.kernel_addr_check = False return nbr_mem_ctrls = opt_mem_channels import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2**intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") cls = ObjectList.mem_list.get(opt_mem_type) mem_ctrls = [] if opt_elastic_trace_en and not issubclass(cls, m5.objects.SimpleMemory): fatal("When elastic trace is enabled, configure mem-type as " "simple-mem.") # The default behaviour is to interleave memory channels on 128 # byte granularity, or cache line granularity if larger than 128 # byte. This value is based on the locality seen across a large # range of workloads. intlv_size = max(128, system.cache_line_size.value) # For every range (most systems will only have one), create an # array of controllers and set their parameters to match their # address mapping in the case of a DRAM #for r in [m5.objects.AddrRange(0x00000000, size='32MB')]: for r in system.mem_ranges: for i in range(nbr_mem_ctrls): mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size) # Set the number of ranks based on the command-line # options if it was explicitly set if issubclass(cls, m5.objects.DRAMCtrl) and opt_mem_ranks: mem_ctrl.ranks_per_channel = opt_mem_ranks # Enable low-power DRAM states if option is set if issubclass(cls, m5.objects.DRAMCtrl): mem_ctrl.enable_dram_powerdown = opt_dram_powerdown if opt_elastic_trace_en: mem_ctrl.latency = '1ns' print("For elastic trace, over-riding Simple Memory " "latency to 1ns.") mem_ctrls.append(mem_ctrl) subsystem.mem_ctrls = mem_ctrls # Connect the controllers to the membus for i in range(len(subsystem.mem_ctrls)): if opt_mem_type == "HMC_2500_1x32": subsystem.mem_ctrls[i].port = xbar[i / 4].master # Set memory device size. There is an independent controller for # each vault. All vaults are same size. subsystem.mem_ctrls[i].device_size = options.hmc_dev_vault_size else: if options.record_dram_traffic: monitor = CommMonitor() monitor.trace = MemTraceProbe(trace_file="dram_%d.trc.gz" % i) xbar.master = monitor.slave monitor.master = subsystem.mem_ctrls[i].port monitor_name = "dram_%d_monitor" % i setattr(subsystem, monitor_name, monitor) else: subsystem.mem_ctrls[i].port = xbar.master
elif len(bm) == 1: root = Root(full_system=True, system=test_sys) else: print("Error I don't know how to create more than 2 systems.") sys.exit(1) if options.timesync: root.time_sync_enable = True if options.frame_capture: VncServer.frame_capture = True if buildEnv['TARGET_ISA'] == "arm" and options.generate_dtb: # Sanity checks if options.dtb_filename: fatal("--generate-dtb and --dtb-filename cannot be specified at the"\ "same time.") if options.machine_type not in ["VExpress_GEM5", "VExpress_GEM5_V1"]: warn("Can only correctly generate a dtb for VExpress_GEM5_V1 " \ "platforms, unless custom hardware models have been equipped "\ "with generation functionality.") # Generate a Device Tree def create_dtb_for_system(system, filename): state = FdtState(addr_cells=2, size_cells=2, cpu_cells=1) rootNode = system.generateDeviceTree(state) fdt = Fdt() fdt.add_rootnode(rootNode) dtb_filename = os.path.join(m5.options.outdir, filename) return fdt.writeDtbFile(dtb_filename)
atomic = True CPUClass = None test_mem_mode = 'atomic' if not atomic: test_mem_mode = 'timing' return (TmpClass, test_mem_mode, CPUClass) ####################################################################### # # Check that we are running on a full-systemarm simulator if not buildEnv['TARGET_ISA'] == "arm": fatal("Expected TARGET_ISA == arm"); ####################################################################### # # Set up basic configuration options # The Panda board runs a Cortex-A9 core revision r2p10 # The cache-controler is a PL310 # The CPU is out-of-order parser = optparse.OptionParser() parser.add_option("--kernel", action="store", type="string") parser.add_option("--bootloader", action="store", type="string") parser.add_option("--ramdisk", action="store", type="string") # The panda board has two CPUs parser.add_option("-n", "--num_cpus", type="int", default=2)
if '--ruby' in sys.argv: print("This script does not support Ruby configuration, mainly" " because Trace CPU has been tested only with classic memory system") sys.exit(1) (options, args) = parser.parse_args() if args: print("Error: script doesn't take any positional arguments") sys.exit(1) numThreads = 1 if options.cpu_type != "TraceCPU": fatal("This is a script for elastic trace replay simulation, use "\ "--cpu-type=TraceCPU\n"); if options.num_cpus > 1: fatal("This script does not support multi-processor trace replay.\n") # In this case FutureClass will be None as there is not fast forwarding or # switching (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads system = System(cpu = CPUClass(cpu_id=0), mem_mode = test_mem_mode, mem_ranges = [AddrRange(options.mem_size)], cache_line_size = options.cacheline_size) # Create a top-level voltage domain
def build_test_system(np): cmdline = cmd_line_template() if buildEnv['TARGET_ISA'] == "alpha": test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby, cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "mips": test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "sparc": test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "x86": test_sys = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0], options.ruby, cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "arm": test_sys = makeArmSystem(test_mem_mode, options.machine_type, options.num_cpus, bm[0], options.dtb_filename, bare_metal=options.bare_metal, cmdline=cmdline, external_memory=options.external_memory_system) if options.enable_context_switch_stats_dump: test_sys.enable_context_switch_stats_dump = True else: fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA']) # Set the cache line size for the entire system test_sys.cache_line_size = options.cacheline_size # Create a top-level voltage domain test_sys.voltage_domain = VoltageDomain(voltage = options.sys_voltage) # Create a source clock for the system and set the clock period test_sys.clk_domain = SrcClockDomain(clock = options.sys_clock, voltage_domain = test_sys.voltage_domain) # Create a CPU voltage domain test_sys.cpu_voltage_domain = VoltageDomain() # Create a source clock for the CPUs and set the clock period test_sys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock, voltage_domain = test_sys.cpu_voltage_domain) if options.kernel is not None: test_sys.kernel = binary(options.kernel) if options.script is not None: test_sys.readfile = options.script if options.lpae: test_sys.have_lpae = True if options.virtualisation: test_sys.have_virtualization = True test_sys.init_param = options.init_param # For now, assign all the CPUs to the same clock domain test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i, function_trace=options.enable_trace) for i in xrange(np)] if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass): test_sys.vm = KvmVM() if options.ruby: # Check for timing mode because ruby does not support atomic accesses if not (options.cpu_type == "detailed" or options.cpu_type == "timing"): print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!" sys.exit(1) Ruby.create_system(options, True, test_sys, test_sys.iobus, test_sys._dma_ports) # Create a seperate clock domain for Ruby test_sys.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock, voltage_domain = test_sys.voltage_domain) # Connect the ruby io port to the PIO bus, # assuming that there is just one such port. test_sys.iobus.master = test_sys.ruby._io_port.slave for (i, cpu) in enumerate(test_sys.cpu): # # Tie the cpu ports to the correct ruby system ports # cpu.clk_domain = test_sys.cpu_clk_domain cpu.createThreads() cpu.createInterruptController() cpu.icache_port = test_sys.ruby._cpu_ports[i].slave cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave if buildEnv['TARGET_ISA'] == "x86": cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave cpu.interrupts[0].pio = test_sys.ruby._cpu_ports[i].master cpu.interrupts[0].int_master = test_sys.ruby._cpu_ports[i].slave cpu.interrupts[0].int_slave = test_sys.ruby._cpu_ports[i].master else: if options.caches or options.l2cache: # By default the IOCache runs at the system clock test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges) test_sys.iocache.cpu_side = test_sys.iobus.master test_sys.iocache.mem_side = test_sys.membus.slave elif not options.external_memory_system: test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges) test_sys.iobridge.slave = test_sys.iobus.master test_sys.iobridge.master = test_sys.membus.slave # Sanity check if options.fastmem: if TestCPUClass != AtomicSimpleCPU: fatal("Fastmem can only be used with atomic CPU!") if (options.caches or options.l2cache): fatal("You cannot use fastmem in combination with caches!") if options.simpoint_profile: if not options.fastmem: # Atomic CPU checked with fastmem option already fatal("SimPoint generation should be done with atomic cpu and fastmem") if np > 1: fatal("SimPoint generation not supported with more than one CPUs") for i in xrange(np): if options.fastmem: test_sys.cpu[i].fastmem = True if options.simpoint_profile: test_sys.cpu[i].addSimPointProbe(options.simpoint_interval) if options.checker: test_sys.cpu[i].addCheckerCpu() test_sys.cpu[i].createThreads() # If elastic tracing is enabled when not restoring from checkpoint and # when not fast forwarding using the atomic cpu, then check that the # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check # passes then attach the elastic trace probe. # If restoring from checkpoint or fast forwarding, the code that does this for # FutureCPUClass is in the Simulation module. If the check passes then the # elastic trace probe is attached to the switch CPUs. if options.elastic_trace_en and options.checkpoint_restore == None and \ not options.fast_forward: CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, options) CacheConfig.config_cache(options, test_sys) MemConfig.config_mem(options, test_sys) return test_sys
# # "m5 test.py" import os import optparse import sys from os.path import join as joinpath from array import array import m5 from m5.defines import buildEnv from m5.objects import * from m5.util import addToPath, fatal if buildEnv['FULL_SYSTEM']: fatal("This script requires syscall emulation mode (*_SE).") addToPath('../common') import Simulation import CacheConfig from Caches import * from cpu2000 import * # Get paths we might need. It's expected this file is in m5/configs/example. config_path = os.path.dirname(os.path.abspath(__file__)) config_root = os.path.dirname(config_path) m5_root = os.path.dirname(config_root) parser = optparse.OptionParser()
def build_test_system(np): cmdline = cmd_line_template() if buildEnv['TARGET_ISA'] == "alpha": test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby, cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "mips": test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "sparc": test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "x86": test_sys = makeLinuxX86System(test_mem_mode, np, bm[0], options.ruby, cmdline=cmdline) elif buildEnv['TARGET_ISA'] == "arm": test_sys = makeArmSystem(test_mem_mode, options.machine_type, np, bm[0], options.dtb_filename, bare_metal=options.bare_metal, cmdline=cmdline, external_memory= options.external_memory_system, ruby=options.ruby, security=options.enable_security_extensions) if options.enable_context_switch_stats_dump: test_sys.enable_context_switch_stats_dump = True else: fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA']) # Set the cache line size for the entire system test_sys.cache_line_size = options.cacheline_size # Create a top-level voltage domain test_sys.voltage_domain = VoltageDomain(voltage = options.sys_voltage) # Create a source clock for the system and set the clock period test_sys.clk_domain = SrcClockDomain(clock = options.sys_clock, voltage_domain = test_sys.voltage_domain) # Create a CPU voltage domain test_sys.cpu_voltage_domain = VoltageDomain() # Create a source clock for the CPUs and set the clock period test_sys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock, voltage_domain = test_sys.cpu_voltage_domain) if options.kernel is not None: test_sys.kernel = binary(options.kernel) else: print("Error: a kernel must be provided to run in full system mode") sys.exit(1) if options.script is not None: test_sys.readfile = options.script if options.lpae: test_sys.have_lpae = True if options.virtualisation: test_sys.have_virtualization = True test_sys.init_param = options.init_param # For now, assign all the CPUs to the same clock domain test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i) for i in range(np)] if CpuConfig.is_kvm_cpu(TestCPUClass) or CpuConfig.is_kvm_cpu(FutureClass): test_sys.kvm_vm = KvmVM() if options.ruby: bootmem = getattr(test_sys, 'bootmem', None) Ruby.create_system(options, True, test_sys, test_sys.iobus, test_sys._dma_ports, bootmem) # Create a seperate clock domain for Ruby test_sys.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock, voltage_domain = test_sys.voltage_domain) # Connect the ruby io port to the PIO bus, # assuming that there is just one such port. test_sys.iobus.master = test_sys.ruby._io_port.slave for (i, cpu) in enumerate(test_sys.cpu): # # Tie the cpu ports to the correct ruby system ports # cpu.clk_domain = test_sys.cpu_clk_domain cpu.createThreads() cpu.createInterruptController() cpu.icache_port = test_sys.ruby._cpu_ports[i].slave cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave if buildEnv['TARGET_ISA'] in ("x86", "arm"): cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave if buildEnv['TARGET_ISA'] in "x86": cpu.interrupts[0].pio = test_sys.ruby._cpu_ports[i].master cpu.interrupts[0].int_master = test_sys.ruby._cpu_ports[i].slave cpu.interrupts[0].int_slave = test_sys.ruby._cpu_ports[i].master else: if options.caches or options.l2cache: # By default the IOCache runs at the system clock test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges) test_sys.iocache.cpu_side = test_sys.iobus.master test_sys.iocache.mem_side = test_sys.membus.slave elif not options.external_memory_system: test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges) test_sys.iobridge.slave = test_sys.iobus.master test_sys.iobridge.master = test_sys.membus.slave # Sanity check if options.simpoint_profile: if not CpuConfig.is_noncaching_cpu(TestCPUClass): fatal("SimPoint generation should be done with atomic cpu") if np > 1: fatal("SimPoint generation not supported with more than one CPUs") for i in range(np): if options.simpoint_profile: test_sys.cpu[i].addSimPointProbe(options.simpoint_interval) if options.checker: test_sys.cpu[i].addCheckerCpu() if options.bp_type: bpClass = BPConfig.get(options.bp_type) test_sys.cpu[i].branchPred = bpClass() if options.indirect_bp_type: IndirectBPClass = \ BPConfig.get_indirect(options.indirect_bp_type) test_sys.cpu[i].branchPred.indirectBranchPred = \ IndirectBPClass() test_sys.cpu[i].createThreads() # If elastic tracing is enabled when not restoring from checkpoint and # when not fast forwarding using the atomic cpu, then check that the # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check # passes then attach the elastic trace probe. # If restoring from checkpoint or fast forwarding, the code that does this for # FutureCPUClass is in the Simulation module. If the check passes then the # elastic trace probe is attached to the switch CPUs. if options.elastic_trace_en and options.checkpoint_restore == None and \ not options.fast_forward: CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, options) CacheConfig.config_cache(options, test_sys) MemConfig.config_mem(options, test_sys) return test_sys
print >> sys.stderr, "Unable to find workload for %s: %s" % ( buildEnv['TARGET_ISA'], app) sys.exit(1) elif options.cmd: multiprocesses, numThreads = get_processes(options) else: print >> sys.stderr, "No workload specified. Exiting!\n" sys.exit(1) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.clock = options.clock CPUClass.numThreads = numThreads # Check -- do not allow SMT with multiple CPUs if options.smt and options.num_cpus > 1: fatal("You cannot use SMT with multiple CPUs!") np = options.num_cpus system = System(cpu=[CPUClass(cpu_id=i) for i in xrange(np)], physmem=SimpleMemory(range=AddrRange("512MB")), membus=CoherentBus(), mem_mode=test_mem_mode) # Sanity check if options.fastmem and (options.caches or options.l2cache): fatal("You cannot use fastmem in combination with caches!") for i in xrange(np): if options.smt: system.cpu[i].workload = multiprocesses elif len(multiprocesses) == 1:
def build_test_system(np): if buildEnv['TARGET_ISA'] == "alpha": test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby) elif buildEnv['TARGET_ISA'] == "mips": test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0]) elif buildEnv['TARGET_ISA'] == "sparc": test_sys = makeSparcSystem(test_mem_mode, bm[0]) elif buildEnv['TARGET_ISA'] == "x86": test_sys = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0], options.ruby) elif buildEnv['TARGET_ISA'] == "arm": test_sys = makeArmSystem(test_mem_mode, options.machine_type, bm[0], options.dtb_filename, bare_metal=options.bare_metal) if options.enable_context_switch_stats_dump: test_sys.enable_context_switch_stats_dump = True else: fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA']) # Set the cache line size for the entire system test_sys.cache_line_size = options.cacheline_size # Create a top-level voltage domain test_sys.voltage_domain = VoltageDomain(voltage=options.sys_voltage) # Create a source clock for the system and set the clock period test_sys.clk_domain = SrcClockDomain( clock=options.sys_clock, voltage_domain=test_sys.voltage_domain) # Create a CPU voltage domain test_sys.cpu_voltage_domain = VoltageDomain() # Create a source clock for the CPUs and set the clock period #test_sys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock, # voltage_domain = # test_sys.cpu_voltage_domain) #test_sys.cpu_clk_domain = SrcClockDomain(clock = ["3GHz","2GHz","1GHz"], test_sys.cpu_clk_domain = SrcClockDomain( clock=[ "3GHz", "2.8GHz", "2.6GHz", "2.4GHz", "2.2GHz", "2.0GHz", "1.8GHz", "1.6GHz", "1.4GHz", "1.3GHz", "1.2GHz", "1.1GHz", "1GHz", "0.9GHz", "0.8GHz", "0.7GHz", "0.6GHz", "0.5GHz", "0.4GHz", "0.3GHz", "0.2GHz" ], voltage_domain=test_sys.cpu_voltage_domain, domain_id=0) if options.kernel is not None: test_sys.kernel = binary(options.kernel) if options.script is not None: test_sys.readfile = options.script if options.lpae: test_sys.have_lpae = True if options.virtualisation: test_sys.have_virtualization = True test_sys.init_param = options.init_param # For now, assign all the CPUs to the same clock domain test_sys.cpu = [ TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i) for i in xrange(np) ] if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass): test_sys.vm = KvmVM() test_sys.dvfs_handler.enable = True test_sys.dvfs_handler.domains = [test_sys.cpu_clk_domain] if options.ruby: # Check for timing mode because ruby does not support atomic accesses if not (options.cpu_type == "detailed" or options.cpu_type == "timing"): print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!" sys.exit(1) Ruby.create_system(options, test_sys, test_sys.iobus, test_sys._dma_ports) # Create a seperate clock domain for Ruby test_sys.ruby.clk_domain = SrcClockDomain( clock=options.ruby_clock, voltage_domain=test_sys.voltage_domain) for (i, cpu) in enumerate(test_sys.cpu): # # Tie the cpu ports to the correct ruby system ports # cpu.clk_domain = test_sys.cpu_clk_domain cpu.createThreads() cpu.createInterruptController() cpu.icache_port = test_sys.ruby._cpu_ports[i].slave cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave if buildEnv['TARGET_ISA'] == "x86": cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave cpu.interrupts.pio = test_sys.ruby._cpu_ports[i].master cpu.interrupts.int_master = test_sys.ruby._cpu_ports[i].slave cpu.interrupts.int_slave = test_sys.ruby._cpu_ports[i].master test_sys.ruby._cpu_ports[i].access_phys_mem = True # Create the appropriate memory controllers # and connect them to the IO bus test_sys.mem_ctrls = [ TestMemClass(range=r) for r in test_sys.mem_ranges ] for i in xrange(len(test_sys.mem_ctrls)): test_sys.mem_ctrls[i].port = test_sys.iobus.master else: if options.caches or options.l2cache: # By default the IOCache runs at the system clock test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges) test_sys.iocache.cpu_side = test_sys.iobus.master test_sys.iocache.mem_side = test_sys.membus.slave else: test_sys.iobridge = Bridge(delay='50ns', ranges=test_sys.mem_ranges) test_sys.iobridge.slave = test_sys.iobus.master test_sys.iobridge.master = test_sys.membus.slave # Sanity check if options.fastmem: if TestCPUClass != AtomicSimpleCPU: fatal("Fastmem can only be used with atomic CPU!") if (options.caches or options.l2cache): fatal("You cannot use fastmem in combination with caches!") for i in xrange(np): if options.fastmem: test_sys.cpu[i].fastmem = True if options.checker: test_sys.cpu[i].addCheckerCpu() test_sys.cpu[i].createThreads() CacheConfig.config_cache(options, test_sys) MemConfig.config_mem(options, test_sys) return test_sys
buildEnv['TARGET_ISA'], app) sys.exit(1) elif options.cmd: multiprocesses, numThreads = get_processes(options) else: print >> sys.stderr, "No workload specified. Exiting!\n" sys.exit(1) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads MemClass = Simulation.setMemClass(options) # Check -- do not allow SMT with multiple CPUs if options.smt and options.num_cpus > 1: fatal("You cannot use SMT with multiple CPUs!") system = System(cpu=[CPUClass(cpu_id=i) for i in range(np)], mem_mode=test_mem_mode, mem_ranges=[AddrRange(options.mem_size)], cache_line_size=options.cacheline_size) if numThreads > 1: system.multi_thread = True # Create a top-level voltage domain system.voltage_domain = VoltageDomain(voltage=options.sys_voltage) # Create a source clock for the system and set the clock period system.clk_domain = SrcClockDomain(clock=options.sys_clock, voltage_domain=system.voltage_domain)
def config_mem(options, system): """ Create the memory controllers based on the options and attach them. If requested, we make a multi-channel configuration of the selected memory controller class by creating multiple instances of the specific class. The individual controllers have their parameters set such that the address range is interleaved between them. """ if options.tlm_memory: system.external_memory = m5.objects.ExternalSlave( port_type="tlm", port_data=options.tlm_memory, port=system.membus.master, addr_ranges=system.mem_ranges) system.kernel_addr_check = False return if options.external_memory_system: system.external_memory = m5.objects.ExternalSlave( port_type=options.external_memory_system, port_data="init_mem0", port=system.membus.master, addr_ranges=system.mem_ranges) system.kernel_addr_check = False return nbr_mem_ctrls = options.mem_channels import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2**intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") cls = get(options.mem_type) mem_ctrls = [] # The default behaviour is to interleave memory channels on 128 # byte granularity, or cache line granularity if larger than 128 # byte. This value is based on the locality seen across a large # range of workloads. intlv_size = max(128, system.cache_line_size.value) # For every range (most systems will only have one), create an # array of controllers and set their parameters to match their # address mapping in the case of a DRAM for r in system.mem_ranges: for i in xrange(nbr_mem_ctrls): mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size) # Set the number of ranks based on the command-line # options if it was explicitly set if issubclass(cls, m5.objects.DRAMCtrl) and \ options.mem_ranks: mem_ctrl.ranks_per_channel = options.mem_ranks mem_ctrls.append(mem_ctrl) system.mem_ctrls = mem_ctrls # Connect the controllers to the membus for i in xrange(len(system.mem_ctrls)): system.mem_ctrls[i].port = system.membus.master
def config_mem(options, system): """ Create the memory controllers based on the options and attach them. If requested, we make a multi-channel configuration of the selected memory controller class by creating multiple instances of the specific class. The individual controllers have their parameters set such that the address range is interleaved between them. """ if (options.mem_type == "HMC_2500_1x32"): HMChost = HMC.config_host_hmc(options, system) HMC.config_hmc(options, system, HMChost.hmc_host) subsystem = system.hmc_dev xbar = system.hmc_dev.xbar else: subsystem = system xbar = system.membus if options.tlm_memory: system.external_memory = m5.objects.ExternalSlave( port_type="tlm_slave", port_data=options.tlm_memory, port=system.membus.master, addr_ranges=system.mem_ranges) system.kernel_addr_check = False return if options.external_memory_system: subsystem.external_memory = m5.objects.ExternalSlave( port_type=options.external_memory_system, port_data="init_mem0", port=xbar.master, addr_ranges=system.mem_ranges) subsystem.kernel_addr_check = False return nbr_mem_ctrls = options.mem_channels import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2**intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") cls = get(options.mem_type) mem_ctrls = [] if options.elastic_trace_en and not issubclass(cls, \ m5.objects.SimpleMemory): fatal("When elastic trace is enabled, configure mem-type as " "simple-mem.") # The default behaviour is to interleave memory channels on 128 # byte granularity, or cache line granularity if larger than 128 # byte. This value is based on the locality seen across a large # range of workloads. intlv_size = max(128, system.cache_line_size.value) # For every range (most systems will only have one), create an # array of controllers and set their parameters to match their # address mapping in the case of a DRAM for r in system.mem_ranges: for i in xrange(nbr_mem_ctrls): mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size) # Set the number of ranks based on the command-line # options if it was explicitly set if issubclass(cls, m5.objects.DRAMCtrl) and \ options.mem_ranks: mem_ctrl.ranks_per_channel = options.mem_ranks if options.elastic_trace_en: mem_ctrl.latency = '1ns' print "For elastic trace, over-riding Simple Memory " \ "latency to 1ns." mem_ctrls.append(mem_ctrl) subsystem.mem_ctrls = mem_ctrls # Connect the controllers to the membus for i in xrange(len(subsystem.mem_ctrls)): if (options.mem_type == "HMC_2500_1x32"): subsystem.mem_ctrls[i].port = xbar[i / 4].master else: subsystem.mem_ctrls[i].port = xbar.master
def config_mem(options, system): """ Create the memory controllers based on the options and attach them. If requested, we make a multi-channel configuration of the selected memory controller class by creating multiple instances of the specific class. The individual controllers have their parameters set such that the address range is interleaved between them. """ # Mandatory options opt_mem_type = options.mem_type opt_mem_channels = options.mem_channels # Optional options opt_tlm_memory = getattr(options, "tlm_memory", None) opt_external_memory_system = getattr(options, "external_memory_system", None) opt_elastic_trace_en = getattr(options, "elastic_trace_en", False) opt_mem_ranks = getattr(options, "mem_ranks", None) if opt_mem_type == "HMC_2500_1x32": HMChost = HMC.config_hmc_host_ctrl(options, system) HMC.config_hmc_dev(options, system, HMChost.hmc_host) subsystem = system.hmc_dev xbar = system.hmc_dev.xbar else: subsystem = system xbar = system.membus #system.mem_ranges.add if opt_tlm_memory: system.external_memory = m5.objects.ExternalSlave( port_type="tlm_slave", port_data=opt_tlm_memory, port=system.membus.master, addr_ranges=system.mem_ranges) system.kernel_addr_check = False return if opt_external_memory_system: subsystem.external_memory = m5.objects.ExternalSlave( port_type=opt_external_memory_system, port_data="init_mem0", port=xbar.master, addr_ranges=system.mem_ranges) subsystem.kernel_addr_check = False return nbr_mem_ctrls = opt_mem_channels import math from m5.util import fatal intlv_bits = int(math.log(nbr_mem_ctrls, 2)) if 2**intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") cls = get(opt_mem_type) mem_ctrls = [] if opt_elastic_trace_en and not issubclass(cls, m5.objects.SimpleMemory): fatal("When elastic trace is enabled, configure mem-type as " "simple-mem.") # The default behaviour is to interleave memory channels on 128 # byte granularity, or cache line granularity if larger than 128 # byte. This value is based on the locality seen across a large # range of workloads. intlv_size = max(128, system.cache_line_size.value) # For every range (most systems will only have one), create an # array of controllers and set their parameters to match their # address mapping in the case of a DRAM # @PIM # if we use PIM, we should get the memory ranges in order to # differentiate phyical memory and in-memory logic/processors addr_base = 0 for r in system.mem_ranges: for i in xrange(nbr_mem_ctrls): mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size) # Set the number of ranks based on the command-line # options if it was explicitly set if issubclass(cls, m5.objects.DRAMCtrl) and opt_mem_ranks: mem_ctrl.ranks_per_channel = opt_mem_ranks if opt_elastic_trace_en: mem_ctrl.latency = '1ns' print("For elastic trace, over-riding Simple Memory " "latency to 1ns.") if hasattr(options, 'enable_pim') and options.enable_pim: mem_ctrl.cpu_type = options.cpu_type mem_ctrl.coherence_granularity = options.coherence_granularity mem_ctrls.append(mem_ctrl) # @PIM # If the memory consists of more than two controller, the ranges # may be separated. It is Thus, we should find the if long(r.end) > addr_base: addr_base = r.end subsystem.mem_ctrls = mem_ctrls if options.mem_type.startswith("HMC"): print("xxx") addr_base = int(MemorySize( options.hmc_dev_vault_size)) * options.hmc_dev_num_vaults - 1 print(addr_base) # @PIM # define in-memory processing units here addr_base = addr_base + 1 if (hasattr(options, 'enable_pim')): pim_enable = options.enable_pim if hasattr(options, 'enable_pim') and pim_enable: print("Enable PIM simulation in the system.") pim_type = options.pim_type num_kernels = options.num_pim_kernels num_processors = options.num_pim_processors num_pim_logic = num_kernels + num_processors if num_pim_logic <= 0: fatal( "The num of PIM logic/processors cannot be zero while enabling PIM." ) if options.mem_type.startswith("HMC"): if num_kernels > 0: num_kernels = 16 num_processors = 0 else: num_processors = 16 num_kernels = 0 system.pim_type = pim_type for cpu in system.cpu: # let host-side processors know the address of PIM logic cpu.pim_base_addr = addr_base # memory contains kernels if pim_type != "cpu" and num_kernels > 0: pim_kernerls = [] print("Creating PIM kernels...") for pid in range(num_kernels): if (options.kernel_type == "adder"): _kernel = PIMAdder() else: if (options.kernel_type == "multiplier"): _kernel = PIMMultiplier() else: if (options.kernel_type == "divider"): _kernel = PIMDivider() else: fatal("no pim kernel type specified.") vd = VoltageDomain(voltage="1.0V") _kernel.clk_domain = SrcClockDomain(clock="1GHz", voltage_domain=vd) _kernel.id = pid # Currently, we use only one bit for accessing a PIM kernel. # Detailed PIM information is defined inside the packet # at mem/pactet.hh(cc) _kernel.addr_ranges = AddrRange(addr_base + pid, addr_base + pid) _kernel.addr_base = addr_base if options.mem_type.startswith("DDR"): # connect to the memory bus if the memory is DRAM _kernel.port = xbar.slave _kernel.mem_port = xbar.master if options.mem_type.startswith("HMC"): _kernel.port = system.membus.slave _kernel.mem_port = system.membus.master pim_kernerls.append(_kernel) system.pim_kernerls = pim_kernerls # memory contains processors if pim_type != "kernel" and num_processors > 0: system.pim_cpu = TimingSimpleCPU(ispim=True, total_host_cpu=options.num_cpus, switched_out=True) pim_vd = VoltageDomain(voltage="1.0V") system.pim_cpu.clk_domain = SrcClockDomain(clock='1GHz', voltage_domain=pim_vd) print("Creating PIM processors...") system.pim_cpu.icache_port = system.membus.slave system.pim_cpu.dcache_port = system.membus.slave system.pim_cpu.workload = system.cpu[0].workload[0] system.pim_cpu.isa = [default_isa_class()] if pim_type == "hybrid": if (num_kernels > 0 and num_processors > 0) == False: fatal("PIM logic is set to hybrid without configured") # Connect the controllers to the membus for i in xrange(len(subsystem.mem_ctrls)): if opt_mem_type == "HMC_2500_1x32": subsystem.mem_ctrls[i].port = xbar[i / 4].master # Set memory device size. There is an independent controller for # each vault. All vaults are same size. subsystem.mem_ctrls[i].device_size = options.hmc_dev_vault_size else: subsystem.mem_ctrls[i].port = xbar.master
def __init__(self): if buildEnv['PROTOCOL'] != 'MOESI_hammer': fatal("This system assumes MOESI_hammer!") super(MOESIHammerCache, self).__init__()
# Check for timing mode because ruby does not support atomic accesses if not (options.cpu_type == "detailed" or options.cpu_type == "timing"): print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!" sys.exit(1) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.clock = options.clock if buildEnv['TARGET_ISA'] == "alpha": system = makeLinuxAlphaRubySystem(test_mem_mode, bm[0]) elif buildEnv['TARGET_ISA'] == "x86": system = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0], True) #system = addExternalDisk(system) Simulation.setWorkCountOptions(system, options) else: fatal("incapable of building non-alpha or non-x86 full system!") if options.mem_size: bm[0]=SysConfig(disk=bm[0].disk, mem=options.mem_size,script=bm[0].script()) if options.kernel is not None: system.kernel = binary(options.kernel) if options.script is not None: system.readfile = options.script #Added by Tianyun for adding bm binary path to command line if options.obj is None: print "Please specify the benchmark binary"; sys.exit(1) else: system.obj = options.obj #Done addition by Tianyun
options.mem_type = "SimpleMemory" #options.total_mem_size = "2112MB" #options.total_mem_size = "3136MB" options.num_dev_dirs = 0 #options.num_dirs = 1 options.mem_channels = options.num_dirs; options.cacheline_size = 128 assert(options.gpu_num_l2caches == 1) if args: print "Error: script doesn't take any positional arguments" sys.exit(1) if buildEnv['TARGET_ISA'] != "arm": fatal("gem5-gpu : this config works with an arm system!") if buildEnv['TARGET_ISA'] != "arm": fatal("This is an ARM config script, please modify to use with other architectures!") #if options.cpu_type != "timing" and options.cpu_type != "TimingSimpleCPU" \ # and options.cpu_type != "detailed" and options.cpu_type != "DerivO3CPU": # print "Warning: gem5-gpu only known to work with timing and detailed CPUs: Proceed at your own risk!" (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) # Match the memories with the CPUs, based on the options for the test system MemClass = Simulation.setMemClass(options) print "Using %s memory model" % options.mem_type #if(options.???.lower().count('emm')): #bm?