def create(args): # System CpuClass, mem_mode, _ = Simulation.setCPUClass(args) sys_cfg = Benchmarks.SysConfig(args.script, args.mem_size) system = FSConfig.makeArmSystem(mem_mode, "VExpress_GEM5_V2", args.num_cpus, sys_cfg, bare_metal=True, security=True) system.voltage_domain = VoltageDomain(voltage=args.sys_voltage) system.clk_domain = SrcClockDomain(clock=args.sys_clock, voltage_domain=system.voltage_domain) system.highest_el_is_64 = True system.have_virtualization = True system.workload.object_file = args.kernel # CPU cluster system.cpu_voltage_domain = VoltageDomain() system.cpu_clk_domain = SrcClockDomain( clock=args.cpu_clock, voltage_domain=system.cpu_voltage_domain) system.cpu = [ CpuClass(clk_domain=system.cpu_clk_domain, cpu_id=i) for i in range(args.num_cpus) ] for cpu in system.cpu: cpu.createThreads() # (gem5 v20.1) Disable FEAT_VHE, prevents booting features = cpu.isa[0].id_aa64mmfr1_el1.getValue() cpu.isa[0].id_aa64mmfr1_el1 = features & ~0xf00 CacheConfig.config_cache(args, system) # Devices system.realview.atp_adapter = ProfileGen(config_files=args.atp_file, exit_when_done=False, init_only=True, disable_watchdog=True, disable_mem_check=True) system.realview.atp_device = ATPDevice(pio_addr=0x2b500000, interrupt=ArmSPI(num=104), atp_id="STREAM") system.realview.attachSmmu([system.realview.atp_device], system.membus) # (gem5 v20.1) Ensure 128 CMDQ entries for compatibility from Linux v5.4 system.realview.smmu.smmu_idr1 = 0x00E00000 # (gem5 v20.2+) Enable SMMUv3 interrupt interface to boot Linux if hasattr(system.realview.smmu, "irq_interface_enable"): system.realview.smmu.irq_interface_enable = True connect_adapter(system.realview.atp_adapter, system.realview.smmu) if args.disk_image: system.disk = [ PciVirtIO(vio=VirtIOBlock(image=create_cow_image(disk))) for disk in args.disk_image ] for disk in system.disk: system.realview.attachPciDevice(disk, system.iobus) # Memory MemConfig.config_mem(args, system) return system
else: fatal("This test is only for FPGA in Ruby. Please set --ruby.\n") (options, args) = parser.parse_args() if args: print "Error: script doesn't take any positional arguments" sys.exit(1) numThreads = 1 process1 = LiveProcess() process1.pid = 1100 process1.cmd = ['tests/test-progs/polybench-c-4.2/fdtd_ref'] (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads np = options.num_cpus system = System( cpu=[DerivO3CPU() for i in xrange(np)], #system = System(cpu = [TimingSimpleCPU() for i in xrange(np)], mem_mode='timing', mem_ranges=[AddrRange('512MB')], cache_line_size=64) system.fpga = [FpgaCPU() for i in xrange(options.num_fpgas)] system.voltage_domain = VoltageDomain(voltage=options.sys_voltage) system.clk_domain = SrcClockDomain(clock=options.sys_clock, voltage_domain=system.voltage_domain) system.cpu_voltage_domain = VoltageDomain()
if args: print("Error: script doesn't take any positional arguments") sys.exit(1) numThreads = 1 if options.cpu_type != "TraceCPU": fatal("This is a script for elastic trace replay simulation, use "\ "--cpu-type=TraceCPU\n"); if options.num_cpus > 1: fatal("This script does not support multi-processor trace replay.\n") # In this case FutureClass will be None as there is not fast forwarding or # switching (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) CPUClass.numThreads = numThreads system = System(cpu = CPUClass(cpu_id=0), mem_mode = test_mem_mode, mem_ranges = [AddrRange(options.mem_size)], cache_line_size = options.cacheline_size) # Create a top-level voltage domain system.voltage_domain = VoltageDomain(voltage = options.sys_voltage) # Create a source clock for the system. This is used as the clock period for # xbar and memory system.clk_domain = SrcClockDomain(clock = options.sys_clock, voltage_domain = system.voltage_domain)
return drive_sys # Add args parser = argparse.ArgumentParser() Options.addCommonOptions(parser) Options.addFSOptions(parser) # Add the ruby specific and protocol specific args if '--ruby' in sys.argv: Ruby.define_options(parser) args = parser.parse_args() # system under test can be any CPU (TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args) # Match the memories with the CPUs, based on the options for the test system TestMemClass = Simulation.setMemClass(args) if args.benchmark: try: bm = Benchmarks[args.benchmark] except KeyError: print("Error benchmark %s has not been defined." % args.benchmark) print("Valid benchmarks are: %s" % DefinedBenchmarks) sys.exit(1) else: if args.dual: bm = [ SysConfig(disks=args.disk_image,
parser.add_argument( "--bare-metal", action="store_true", help="Provide the raw system without the linux specific bits") parser.add_argument("--dtb-filename", action="store", type=str, help="Specifies device tree blob file to use with device-tree-"\ "enabled kernels") parser.add_argument("--virtio-rng", action="store_true", help="Enable VirtIORng device") # ---------------------------- Parse Options --------------------------- # args = parser.parse_args() # CPU and Memory (CPUClass, mem_mode, FutureClass) = Simulation.setCPUClass(args) MemClass = Simulation.setMemClass(args) np = args.num_cpus # ---------------------------- Setup System ---------------------------- # # Default Setup system = System() mdesc = SysConfig(disks=args.disk_image, rootdev=args.root_device, mem=args.mem_size, os_type=args.os_type) system.mem_mode = mem_mode system.mem_ranges = [AddrRange(start=0x80000000, size=mdesc.mem())] if args.bare_metal:
else: exec("workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')" % (app, options.spec_input)) multiprocesses.append(workload.makeLiveProcess()) except: print >> sys.stderr, "Unable to find workload for %s: %s" % ( buildEnv['TARGET_ISA'], app) sys.exit(1) elif options.cmd: multiprocesses, numThreads = get_processes(options) else: print >> sys.stderr, "No workload specified. Exiting!\n" sys.exit(1) (CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) #------ CPUClass.numThreads = numThreads # Check -- do not allow SMT with multiple CPUs if options.smt and options.num_cpus > 1: fatal("You cannot use SMT with multiple CPUs!") np = options.num_cpus system = System(cpu=[CPUClass(cpu_id=i) for i in xrange(np)], mem_mode=test_mem_mode, mem_ranges=[AddrRange(options.mem_size)], cache_line_size=options.cacheline_size) if numThreads > 1: system.multi_thread = True
Options.addFSOptions(parser) Options.addSPMOptions(parser) # Add the ruby specific and protocol specific options if '--ruby' in sys.argv: Ruby.define_options(parser) (options, args) = parser.parse_args() if args: print("Error: script doesn't take any positional arguments") sys.exit(1) # system under test can be any CPU (TestCPUClass, test_mem_mode, FutureClass, FutureClass2) = Simulation.setCPUClass(options) # Match the memories with the CPUs, based on the options for the test system TestMemClass = Simulation.setMemClass(options) if options.benchmark: try: bm = Benchmarks[options.benchmark] except KeyError: print("Error benchmark %s has not been defined." % options.benchmark) print("Valid benchmarks are: %s" % DefinedBenchmarks) sys.exit(1) else: if options.dual: bm = [ SysConfig(disk=options.disk_image,
def makeGpuFSSystem(args): # Boot options are standard gem5 options plus: # - Framebuffer device emulation 0 to reduce driver code paths. # - Blacklist amdgpu as it cannot (currently) load in KVM CPU. # - Blacklist psmouse as amdgpu driver adds proprietary commands that # cause gem5 to panic. boot_options = ['earlyprintk=ttyS0', 'console=ttyS0,9600', 'lpj=7999923', 'root=/dev/sda1', 'drm_kms_helper.fbdev_emulation=0', 'modprobe.blacklist=amdgpu', 'modprobe.blacklist=psmouse'] cmdline = ' '.join(boot_options) if MemorySize(args.mem_size) < MemorySize('2GB'): panic("Need at least 2GB of system memory to load amdgpu module") # Use the common FSConfig to setup a Linux X86 System (TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args) bm = SysConfig(disks=[args.disk_image], mem=args.mem_size) system = makeLinuxX86System(test_mem_mode, args.num_cpus, bm, True, cmdline=cmdline) system.workload.object_file = binary(args.kernel) # Set the cache line size for the entire system. system.cache_line_size = args.cacheline_size # Create a top-level voltage and clock domain. system.voltage_domain = VoltageDomain(voltage = args.sys_voltage) system.clk_domain = SrcClockDomain(clock = args.sys_clock, voltage_domain = system.voltage_domain) # Create a CPU voltage and clock domain. system.cpu_voltage_domain = VoltageDomain() system.cpu_clk_domain = SrcClockDomain(clock = args.cpu_clock, voltage_domain = system.cpu_voltage_domain) # Setup VGA ROM region system.shadow_rom_ranges = [AddrRange(0xc0000, size = Addr('128kB'))] # Create specified number of CPUs. GPUFS really only needs one. system.cpu = [TestCPUClass(clk_domain=system.cpu_clk_domain, cpu_id=i) for i in range(args.num_cpus)] if ObjectList.is_kvm_cpu(TestCPUClass) or \ ObjectList.is_kvm_cpu(FutureClass): system.kvm_vm = KvmVM() # Create AMDGPU and attach to southbridge shader = createGPU(system, args) connectGPU(system, args) # This arbitrary address is something in the X86 I/O hole hsapp_gpu_map_paddr = 0xe00000000 gpu_hsapp = HSAPacketProcessor(pioAddr=hsapp_gpu_map_paddr, numHWQueues=args.num_hw_queues) dispatcher = GPUDispatcher() gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp, dispatcher=dispatcher) shader.dispatcher = dispatcher shader.gpu_cmd_proc = gpu_cmd_proc # GPU, HSAPP, and GPUCommandProc are DMA devices system._dma_ports.append(gpu_hsapp) system._dma_ports.append(gpu_cmd_proc) system._dma_ports.append(system.pc.south_bridge.gpu) gpu_hsapp.pio = system.iobus.mem_side_ports gpu_cmd_proc.pio = system.iobus.mem_side_ports system.pc.south_bridge.gpu.pio = system.iobus.mem_side_ports # Create Ruby system using Ruby.py for now Ruby.create_system(args, True, system, system.iobus, system._dma_ports) # Create a seperate clock domain for Ruby system.ruby.clk_domain = SrcClockDomain(clock = args.ruby_clock, voltage_domain = system.voltage_domain) for (i, cpu) in enumerate(system.cpu): # # Tie the cpu ports to the correct ruby system ports # cpu.clk_domain = system.cpu_clk_domain cpu.createThreads() cpu.createInterruptController() system.ruby._cpu_ports[i].connectCpuPorts(cpu) # The shader core will be whatever is after the CPU cores are accounted for shader_idx = args.num_cpus system.cpu.append(shader) gpu_port_idx = len(system.ruby._cpu_ports) \ - args.num_compute_units - args.num_sqc \ - args.num_scalar_cache gpu_port_idx = gpu_port_idx - args.num_cp * 2 # Connect token ports. For this we need to search through the list of all # sequencers, since the TCP coalescers will not necessarily be first. Only # TCP coalescers use a token port for back pressure. token_port_idx = 0 for i in range(len(system.ruby._cpu_ports)): if isinstance(system.ruby._cpu_ports[i], VIPERCoalescer): system.cpu[shader_idx].CUs[token_port_idx].gmTokenPort = \ system.ruby._cpu_ports[i].gmTokenPort token_port_idx += 1 wavefront_size = args.wf_size for i in range(args.num_compute_units): # The pipeline issues wavefront_size number of uncoalesced requests # in one GPU issue cycle. Hence wavefront_size mem ports. for j in range(wavefront_size): system.cpu[shader_idx].CUs[i].memory_port[j] = \ system.ruby._cpu_ports[gpu_port_idx].in_ports[j] gpu_port_idx += 1 for i in range(args.num_compute_units): if i > 0 and not i % args.cu_per_sqc: gpu_port_idx += 1 system.cpu[shader_idx].CUs[i].sqc_port = \ system.ruby._cpu_ports[gpu_port_idx].in_ports gpu_port_idx = gpu_port_idx + 1 for i in range(args.num_compute_units): if i > 0 and not i % args.cu_per_scalar_cache: gpu_port_idx += 1 system.cpu[shader_idx].CUs[i].scalar_port = \ system.ruby._cpu_ports[gpu_port_idx].in_ports gpu_port_idx = gpu_port_idx + 1 return system