Beispiel #1
0
def setup_memory_controllers(system, ruby, dir_cntrls, options):
    ruby.block_size_bytes = options.cacheline_size
    ruby.memory_size_bits = 48
    block_size_bits = int(math.log(options.cacheline_size, 2))

    if options.numa_high_bit:
        numa_bit = options.numa_high_bit
    else:
        # if the numa_bit is not specified, set the directory bits as the
        # lowest bits above the block offset bits, and the numa_bit as the
        # highest of those directory bits
        dir_bits = int(math.log(options.num_dirs, 2))
        numa_bit = block_size_bits + dir_bits - 1

    index = 0
    mem_ctrls = []
    crossbars = []

    # Sets bits to be used for interleaving.  Creates memory controllers
    # attached to a directory controller.  A separate controller is created
    # for each address range as the abstract memory can handle only one
    # contiguous address range as of now.
    for dir_cntrl in dir_cntrls:
        dir_cntrl.directory.numa_high_bit = numa_bit

        crossbar = None
        if buildEnv['TARGET_ISA'] != "arm":
            if len(system.mem_ranges) > 1:
                crossbar = IOXBar()
                crossbars.append(crossbar)
                dir_cntrl.memory = crossbar.slave
        else:
            #connect to iobus crossbar
            dir_cntrl.memory = system.iobus.slave

        for r in system.mem_ranges:
            mem_ctrl = MemConfig.create_mem_ctrl(
                MemConfig.get(options.mem_type), r, index, options.num_dirs,
                int(math.log(options.num_dirs, 2)), options.cacheline_size)

            mem_ctrls.append(mem_ctrl)

            if buildEnv['TARGET_ISA'] != "arm":
                if crossbar != None:
                    mem_ctrl.port = crossbar.master
                else:
                    mem_ctrl.port = dir_cntrl.memory
            else:
                #ARM
                mem_ctrl.port = system.iobus.master

        index += 1

    system.mem_ctrls = mem_ctrls

    if buildEnv['TARGET_ISA'] != "arm":
        if len(crossbars) > 0:
            ruby.crossbars = crossbars
Beispiel #2
0
def setup_memory_controllers(system, ruby, dir_cntrls, options):
    ruby.block_size_bytes = options.cacheline_size
    ruby.memory_size_bits = 48
    block_size_bits = int(math.log(options.cacheline_size, 2))

    if options.numa_high_bit:
        numa_bit = options.numa_high_bit
    else:
        # if the numa_bit is not specified, set the directory bits as the
        # lowest bits above the block offset bits, and the numa_bit as the
        # highest of those directory bits
        dir_bits = int(math.log(options.num_dirs, 2))
        numa_bit = block_size_bits + dir_bits - 1

    index = 0
    mem_ctrls = []
    crossbars = []

    # Sets bits to be used for interleaving.  Creates memory controllers
    # attached to a directory controller.  A separate controller is created
    # for each address range as the abstract memory can handle only one
    # contiguous address range as of now.
    for dir_cntrl in dir_cntrls:
        dir_cntrl.directory.numa_high_bit = numa_bit

        crossbar = None
        if len(system.mem_ranges) > 1:
            crossbar = NoncoherentXBar()
            crossbars.append(crossbar)
            dir_cntrl.memory = crossbar.slave

        for r in system.mem_ranges:
            mem_ctrl = MemConfig.create_mem_ctrl(
                MemConfig.get(options.mem_type), r, index, options.num_dirs,
                int(math.log(options.num_dirs, 2)), options.cacheline_size)

            mem_ctrls.append(mem_ctrl)

            if crossbar != None:
                mem_ctrl.port = crossbar.master
            else:
                mem_ctrl.port = dir_cntrl.memory

        index += 1

    system.mem_ctrls = mem_ctrls

    if len(crossbars) > 0:
        ruby.crossbars = crossbars
Beispiel #3
0
def addTHNVMOptions(parser):
    parser.add_option("--dram-type", type="choice", default="DDR3_1600_x64",
                      choices=MemConfig.mem_names(),
                      help = "type of DRAM to use")
    parser.add_option("--nvm-type", type="choice", default="DDR3_1600_x64_PCM",
                      choices=MemConfig.mem_names(),
                      help = "type of NVM to use")
    parser.add_option("--block-bits", type="int", default=6,
            help="number of bits of a block in the block remapping scheme")
    parser.add_option("--page-bits", type="int", default=12,
            help="number of bits of a page in the page writeback scheme")
    parser.add_option("--btt-length", type="int", default=0,
                      help="number of BTT entries")
    parser.add_option("--ptt-length", type="int", default=0,
                      help="number of PTT entries")
Beispiel #4
0
def config_hybrid_mem(options, system):
    """
    Assign proper address ranges for DRAM and NVM controllers.
    Create memory controllers and add their shared bus to the system.
    """
    system.thnvm_bus = VirtualXBar()
    mem_ctrls = []

    # The default behaviour is to interleave memory channels on 128
    # byte granularity, or cache line granularity if larger than 128
    # byte. This value is based on the locality seen across a large
    # range of workloads.
    intlv_size = max(128, system.cache_line_size.value)

    total_size = Addr(options.mem_size)
    dram_size = pow(2, options.page_bits) * options.ptt_length

    if dram_size < total_size.value:
        nvm_cls = MemConfig.get(options.nvm_type)
        nvm_range = AddrRange(0, total_size - dram_size - 1)
        nvm_ctrl = MemConfig.create_mem_ctrl(nvm_cls, nvm_range, 0, 1, 0,
                                             intlv_size)
        # Set the number of ranks based on the command-line
        # options if it was explicitly set
        if issubclass(nvm_cls, DRAMCtrl) and options.mem_ranks:
            nvm_ctrl.ranks_per_channel = options.mem_ranks

        mem_ctrls.append(nvm_ctrl)

    if dram_size > 0:
        dram_cls = MemConfig.get(options.dram_type)
        dram_range = AddrRange(total_size - dram_size, total_size - 1)
        dram_ctrl = MemConfig.create_mem_ctrl(dram_cls, dram_range, 0, 1, 0,
                                              intlv_size)
        # Set the number of ranks based on the command-line
        # options if it was explicitly set
        if issubclass(dram_cls, DRAMCtrl) and options.mem_ranks:
            dram_ctrl.ranks_per_channel = options.mem_ranks

        mem_ctrls.append(dram_ctrl)

    system.mem_ctrls = mem_ctrls

    # Connect the controllers to the THNVM bus
    for i in xrange(len(system.mem_ctrls)):
        system.mem_ctrls[i].port = system.thnvm_bus.master

    system.thnvm_bus.slave = system.membus.master
Beispiel #5
0
def config_hybrid_mem(options, system):
    """
    Assign proper address ranges for DRAM and NVM controllers.
    Create memory controllers and add their shared bus to the system.
    """
    system.thnvm_bus = VirtualXBar()
    mem_ctrls = []

    # The default behaviour is to interleave memory channels on 128
    # byte granularity, or cache line granularity if larger than 128
    # byte. This value is based on the locality seen across a large
    # range of workloads.
    intlv_size = max(128, system.cache_line_size.value)

    total_size = Addr(options.mem_size)
    dram_size = pow(2, options.page_bits) * options.ptt_length

    if dram_size < total_size.value:
        nvm_cls = MemConfig.get(options.nvm_type)
        nvm_range = AddrRange(0, total_size - dram_size - 1)
        nvm_ctrl = MemConfig.create_mem_ctrl(nvm_cls, nvm_range,
                                             0, 1, 0, intlv_size)
        # Set the number of ranks based on the command-line
        # options if it was explicitly set
        if issubclass(nvm_cls, DRAMCtrl) and options.mem_ranks:
            nvm_ctrl.ranks_per_channel = options.mem_ranks

        mem_ctrls.append(nvm_ctrl)

    if dram_size > 0:
        dram_cls = MemConfig.get(options.dram_type)
        dram_range = AddrRange(total_size - dram_size, total_size - 1)
        dram_ctrl = MemConfig.create_mem_ctrl(dram_cls, dram_range,
                                              0, 1, 0, intlv_size)
        # Set the number of ranks based on the command-line
        # options if it was explicitly set
        if issubclass(dram_cls, DRAMCtrl) and options.mem_ranks:
            dram_ctrl.ranks_per_channel = options.mem_ranks

        mem_ctrls.append(dram_ctrl)

    system.mem_ctrls = mem_ctrls

    # Connect the controllers to the THNVM bus
    for i in xrange(len(system.mem_ctrls)):
        system.mem_ctrls[i].port = system.thnvm_bus.master

    system.thnvm_bus.slave = system.membus.master
Beispiel #6
0
def addFSOptions(parser):
    # Simulation options
    parser.add_option(
        "--timesync",
        action="store_true",
        help="Prevent simulated time from getting ahead of real time")

    # JIWON: pim options
    parser.add_option("--num-pim-sys", type="int", default=0)
    parser.add_option("--pim-mem-type",
                      type="choice",
                      default="HMCVault",
                      choices=MemConfig.mem_names())

    # System options
    parser.add_option("--kernel", action="store", type="string")
    parser.add_option("--script", action="store", type="string")
    parser.add_option("--frame-capture", action="store_true",
            help="Stores changed frame buffers from the VNC server to compressed "\
            "files in the gem5 output directory")

    if buildEnv['TARGET_ISA'] == "arm":
        parser.add_option(
            "--bare-metal",
            action="store_true",
            help="Provide the raw system without the linux specific bits")
        parser.add_option("--machine-type",
                          action="store",
                          type="choice",
                          choices=ArmMachineType.map.keys(),
                          default="VExpress_EMM")
        parser.add_option("--dtb-filename", action="store", type="string",
              help="Specifies device tree blob file to use with device-tree-"\
              "enabled kernels")
        parser.add_option("--enable-context-switch-stats-dump", \
                action="store_true", help="Enable stats dump at context "\
                "switches and dump tasks file (required for Streamline)")

    # Benchmark options
    parser.add_option(
        "--dual",
        action="store_true",
        help="Simulate two systems attached with an ethernet link")
    parser.add_option("-b", "--benchmark", action="store", type="string",
                      dest="benchmark",
                      help="Specify the benchmark to run. Available benchmarks: %s"\
                      % DefinedBenchmarks)

    # Metafile options
    parser.add_option("--etherdump", action="store", type="string", dest="etherdump",
                      help="Specify the filename to dump a pcap capture of the" \
                      "ethernet traffic")

    # Disk Image Options
    parser.add_option("--disk-image",
                      action="store",
                      type="string",
                      default=None,
                      help="Path to the disk image to use.")
Beispiel #7
0
def jiwon_config_pim( test_sys, options ):
    (TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options)

    # create PIM vault ctrls
    cls = MemConfig.get(options.pim_mem_type)
    pim_vault_ctrls = []
    for i in xrange(options.num_pim_sys):
        vault_range = AddrRange(PIM_VAULT_BASE_ADDR + i*DRAM_CHANNEL_SIZE_INT, size=DRAM_CHANNEL_SIZE_MB)
        pim_vault = ethz_create_mem_ctrl(cls, vault_range, i=0, nbr_mem_ctrls=1, intlv_bits=0, cache_line_size=test_sys.cache_line_size.value) 
        pim_vault_ctrls.append(pim_vault)

    test_sys.pim_vault_ctrls = pim_vault_ctrls

    # create PIM cores
    test_sys.pim_sys = [build_pim_system(options, test_mem_mode, TestCPUClass, pim_id=i) for i in xrange(options.num_pim_sys)]
    for i in xrange(options.num_pim_sys):
        pim_device_range = AddrRange(PIM_ADDRESS_BASE + i*PIM_ADDRESS_SIZE_INT, size = PIM_ADDRESS_SIZE) #physical address range
        pim_vault_phys_addr_base = PIM_VAULT_BASE_ADDR + i*DRAM_CHANNEL_SIZE_INT
        pim_vault_range = AddrRange(pim_vault_phys_addr_base, size=DRAM_CHANNEL_SIZE_MB) #physical address range
        test_sys.pim_sys[i].p2s = Bridge(ranges=pim_vault_range, delay='0.01ns', req_size=32, resp_size=32)
        test_sys.pim_sys[i].s2p = Bridge(ranges=pim_device_range, delay='0.01ns', req_size=32, resp_size=32)

        # add vault address ranges to TLB
        pim_vault_virt_addr_base = 0xC0000000  #JIWON: TODO-- need to change this to something more reasonable....
        test_sys.pim_sys[i].itlb.original_ranges.append(AddrRange(pim_vault_virt_addr_base, size=DRAM_CHANNEL_SIZE_MB))
        test_sys.pim_sys[i].itlb.remapped_ranges.append(AddrRange(pim_vault_phys_addr_base, size=DRAM_CHANNEL_SIZE_MB))
        test_sys.pim_sys[i].dtlb.original_ranges.append(AddrRange(pim_vault_virt_addr_base, size=DRAM_CHANNEL_SIZE_MB))
        test_sys.pim_sys[i].dtlb.remapped_ranges.append(AddrRange(pim_vault_phys_addr_base, size=DRAM_CHANNEL_SIZE_MB))
        test_sys.pim_sys[i].stlb.original_ranges.append(AddrRange(pim_vault_virt_addr_base, size=DRAM_CHANNEL_SIZE_MB))
        test_sys.pim_sys[i].stlb.remapped_ranges.append(AddrRange(pim_vault_phys_addr_base, size=DRAM_CHANNEL_SIZE_MB))

        if ( GEM5_ENABLE_COMM_MONITORS == "TRUE" ):
            test_sys.pim_sys[i].Smon = CommMonitor()

            if ( SMON_DUMP_ADDRESS == "TRUE" ):
                test_sys.pim_sys[i].Smon.dump_addresses=True
                test_sys.pim_sys[i].Smon.dump_file="m5out/smon_addr_dump.txt"

            test_sys.pim_sys[i].pimbus.master = test_sys.pim_sys[i].Smon.slave
            test_sys.pim_sys[i].Smon.master = test_sys.pim_sys[i].p2s.slave
        else:
            test_sys.pim_sys[i].pimbus.master = test_sys.pim_sys[i].p2s.slave

        test_sys.pim_sys[i].s2p.master = test_sys.pim_sys[i].pimbus.slave
        if ( MOVE_PIM_TO_HOST == "FALSE" ):
            test_sys.smcxbar.master = test_sys.pim_sys[i].s2p.slave # connect PIM core to system
            test_sys.pim_vault_ctrls[i].port = test_sys.pim_sys[i].p2s.master # connect PIM vault to PIM core
        else:
            test_sys.pim_sys[i].p2s.master = test_sys.membus.slave
            test_sys.membus.master = test_sys.pim_sys[i].s2p.slave;
Beispiel #8
0
def ethz_config_mem(options, system):
    """
    Create the memory controllers based on the options and attach them.

    If requested, we make a multi-channel configuration of the
    selected memory controller class by creating multiple instances of
    the specific class. The individual controllers have their
    parameters set such that the address range is interleaved between
    them.
    """
    nbr_mem_ctrls = options.mem_channels
    import math
    from m5.util import fatal
    intlv_bits = int(math.log(nbr_mem_ctrls, 2))
    if 2**intlv_bits != nbr_mem_ctrls:
        fatal("Number of memory channels must be a power of 2")

    cls = MemConfig.get(options.mem_type)
    mem_ctrls = []

    # For every range (most systems will only have one), create an
    # array of controllers and set their parameters to match their
    # address mapping in the case of a DRAM
    for r in system.mem_ranges:
        for i in xrange(nbr_mem_ctrls):
            mem_ctrls.append(
                ethz_create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits,
                                     system.cache_line_size.value))

    system.mem_ctrls = mem_ctrls

    # Connect the controllers to the smcxbar
    for i in xrange(len(system.mem_ctrls)):
        system.mem_ctrls[i].port = system.smcxbar.master  # Modified by Erfan

    ethz_print_val("system.cache_line_size.value (bytes)",
                   system.cache_line_size.value)

    if (options.mem_type != "ethz_ModelsimIF"):
        ethz_print_val("number of vaults", nbr_mem_ctrls)
Beispiel #9
0
def _listMemTypes(option, opt, value, parser):
    MemConfig.print_mem_list()
    sys.exit(0)
Beispiel #10
0
# Sanity check
if options.fastmem:
    if TestCPUClass != AtomicSimpleCPU:
        fatal("Fastmem can only be used with atomic CPU!")
    if (options.caches or options.l2cache):
        fatal("You cannot use fastmem in combination with caches!")

for i in xrange(np):
    if options.fastmem:
        test_sys.cpu[i].fastmem = True
    if options.checker:
        test_sys.cpu[i].addCheckerCpu()
    test_sys.cpu[i].createThreads()

CacheConfig.config_cache(options, test_sys)
MemConfig.config_mem(options, test_sys)

if len(bm) == 2:
    if buildEnv['TARGET_ISA'] == 'alpha':
        drive_sys = makeLinuxAlphaSystem(drive_mem_mode, bm[1])
    elif buildEnv['TARGET_ISA'] == 'mips':
        drive_sys = makeLinuxMipsSystem(drive_mem_mode, bm[1])
    elif buildEnv['TARGET_ISA'] == 'sparc':
        drive_sys = makeSparcSystem(drive_mem_mode, bm[1])
    elif buildEnv['TARGET_ISA'] == 'x86':
        drive_sys = makeLinuxX86System(drive_mem_mode, np, bm[1])
    elif buildEnv['TARGET_ISA'] == 'arm':
        drive_sys = makeArmSystem(drive_mem_mode, options.machine_type, bm[1])

    # Create a top-level voltage domain
    drive_sys.voltage_domain = VoltageDomain(voltage=options.sys_voltage)
Beispiel #11
0
    assert (options.num_cpus == len(system.ruby._cpu_ports))

    system.ruby.clk_domain = SrcClockDomain(
        clock=options.ruby_clock, voltage_domain=system.voltage_domain)
    for i in xrange(np):
        ruby_port = system.ruby._cpu_ports[i]

        # Create the interrupt controller and connect its ports to Ruby
        # Note that the interrupt controller is always present but only
        # in x86 does it have message ports that need to be connected
        system.cpu[i].createInterruptController()

        # Connect the cpu's cache ports to Ruby
        system.cpu[i].icache_port = ruby_port.slave
        system.cpu[i].dcache_port = ruby_port.slave
        if buildEnv['TARGET_ISA'] == 'x86':
            system.cpu[i].interrupts.pio = ruby_port.master
            system.cpu[i].interrupts.int_master = ruby_port.slave
            system.cpu[i].interrupts.int_slave = ruby_port.master
            system.cpu[i].itb.walker.port = ruby_port.slave
            system.cpu[i].dtb.walker.port = ruby_port.slave
else:
    MemClass = Simulation.setMemClass(options)
    system.membus = SystemXBar()
    system.system_port = system.membus.slave
    CacheConfig.config_cache(options, system)
    MemConfig.config_mem(options, system)

root = Root(full_system=False, system=system)
Simulation.run(options, root, system, FutureClass)
Beispiel #12
0
def run_system_with_cpu(
        process, options, output_dir,
        warmup_cpu_class=None,
        warmup_instructions=0,
        real_cpu_create_function=lambda cpu_id: DerivO3CPU(cpu_id=cpu_id),
):
    # Override the -d outdir --outdir option to gem5
    m5.options.outdir = output_dir
    m5.core.setOutputDir(m5.options.outdir)

    m5.stats.reset()

    max_tick = options.abs_max_tick
    if options.rel_max_tick:
        max_tick = options.rel_max_tick
    elif options.maxtime:
        max_tick = int(options.maxtime * 1000 * 1000 * 1000 * 1000)

    eprint("Simulating until tick=%s" %  (max_tick))

    real_cpus = [real_cpu_create_function(0)]
    mem_mode = real_cpus[0].memory_mode()

    if warmup_cpu_class:
        warmup_cpus = [warmup_cpu_class(cpu_id=0)]
        warmup_cpus[0].max_insts_any_thread = warmup_instructions
    else:
        warmup_cpus = real_cpus

    system = System(cpu = warmup_cpus,
                    mem_mode = mem_mode,
                    mem_ranges = [AddrRange(options.mem_size)],
                    cache_line_size = options.cacheline_size)
    system.multi_thread = False
    system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
    system.clk_domain = SrcClockDomain(clock =  options.sys_clock,
                                       voltage_domain = system.voltage_domain)
    system.cpu_voltage_domain = VoltageDomain()
    system.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
                                           voltage_domain =
                                           system.cpu_voltage_domain)
    system.cache_line_size = options.cacheline_size
    if warmup_cpu_class:
        for cpu in real_cpus:
            cpu.clk_domain = system.cpu_clk_domain
            cpu.workload = process
            cpu.system = system
            cpu.switched_out = True
            cpu.createThreads()
        system.switch_cpus = real_cpus

    for cpu in system.cpu:
        cpu.clk_domain = system.cpu_clk_domain
        cpu.workload = process
        if options.prog_interval:
            cpu.progress_interval = options.prog_interval
        cpu.createThreads();

    MemClass = Simulation.setMemClass(options)
    system.membus = SystemXBar()
    system.system_port = system.membus.slave
    system.cpu[0].connectAllPorts(system.membus)
    MemConfig.config_mem(options, system)
    root = Root(full_system = False, system = system)

    m5.options.outdir = output_dir
    m5.instantiate(None) # None == no checkpoint
    if warmup_cpu_class:
        eprint("Running warmup with warmup CPU class (%d instrs.)" % (warmup_instructions))
    eprint("Starting simulation")
    exit_event = m5.simulate(max_tick)
    if warmup_cpu_class:
        max_tick -= m5.curTick()
        m5.stats.reset()
        debug_print("Finished warmup; running real simulation")
        m5.switchCpus(system, real_cpus)
        exit_event = m5.simulate(max_tick)
    eprint("Done simulation @ tick = %s: %s" % (m5.curTick(), exit_event.getCause()))
    m5.stats.dump()
Beispiel #13
0
# I/D Cache configuration
for i in xrange(np):
    sys0.cpu[i].icache = L1Cache(size='32kB')
    sys0.cpu[i].icache.cpu_side = sys0.cpu[i].icache_port
    sys0.cpu[i].icache.mem_side = sys0.membus.slave

    sys0.cpu[i].dcache = L1Cache(size='32kB')
    sys0.cpu[i].dcache.cpu_side = sys0.cpu[i].dcache_port
    sys0.cpu[i].dcache.mem_side = sys0.membus.slave

sys0.iocache = IOCache(addr_ranges=sys0.mem_ranges)
sys0.iocache.cpu_side = sys0.iobus.master
sys0.iocache.mem_side = sys0.membus.slave

MemConfig.config_mem(options, sys0)

# Create a top-level voltage domain
sys0.voltage_domain = VoltageDomain(voltage=options.sys_voltage)

# Create a source clock for the system and set the clock period
sys0.clk_domain = SrcClockDomain(clock=options.sys_clock,
                                 voltage_domain=sys0.voltage_domain)

# Hierarchy configuration
root = Root(full_system=True)
root.trace_system = sys0

#-------------------------------------------
#-- Run simulation.
#-------------------------------------------
Beispiel #14
0
from m5.internal.stats import periodicStatDump

addToPath('../common')

import MemConfig

# this script is helpful to sweep the efficiency of a specific memory
# controller configuration, by varying the number of banks accessed,
# and the sequential stride size (how many bytes per activate), and
# observe what bus utilisation (bandwidth) is achieved

parser = optparse.OptionParser()

# Use a single-channel DDR3-1600 x64 by default
parser.add_option("--mem-type", type="choice", default="ddr3_1600_x64",
                  choices=MemConfig.mem_names(),
                  help = "type of memory to use")

parser.add_option("--ranks", "-r", type="int", default=1,
                  help = "Number of ranks to iterate across")

parser.add_option("--rd_perc", type="int", default=100,
                  help = "Percentage of read commands")

parser.add_option("--mode", type="choice", default="DRAM",
                  choices=["DRAM", "DRAM_ROTATE"],
                  help = "DRAM: Random traffic; \
                          DRAM_ROTATE: Traffic rotating across banks and ranks")

parser.add_option("--addr_map", type="int", default=1,
                  help = "0: RoCoRaBaCh; 1: RoRaBaCoCh/RoRaBaChCo")
Beispiel #15
0
def addCommonOptions(parser):
    # system options
    parser.add_option("--extra", type="int", default=0)
    parser.add_option("--instructionQueue-instruction-flag",
                      type="int",
                      default=0)
    parser.add_option("--instructionQueue-instruction-faultType",
                      type="int",
                      default=0)
    parser.add_option("--instructionQueue-instruction-faultRate",
                      type="float",
                      default=0.0)
    parser.add_option("--reorderBuffer-instruction-flag",
                      type="int",
                      default=0)
    parser.add_option("--reorderBuffer-instruction-faultType",
                      type="int",
                      default=0)
    parser.add_option("--reorderBuffer-instruction-faultRate",
                      type="float",
                      default=0.0)
    parser.add_option("--register-integer-flag", type="int", default=0)
    parser.add_option("--register-integer-faultType", type="int", default=0)
    parser.add_option("--register-integer-faultRate",
                      type="float",
                      default=0.0)
    parser.add_option("--register-floatingPoint-flag", type="int", default=0)
    parser.add_option("--register-floatingPoint-faultType",
                      type="int",
                      default=0)
    parser.add_option("--register-floatingPoint-faultRate",
                      type="float",
                      default=0.0)
    parser.add_option("--cache-tag-flag", type="int", default=0)
    parser.add_option("--cache-tag-faultType", type="int", default=0)
    parser.add_option("--cache-tag-faultRate", type="float", default=0.0)
    parser.add_option("--cache-state-flag", type="int", default=0)
    parser.add_option("--cache-state-faultType", type="int", default=0)
    parser.add_option("--cache-state-faultRate", type="float", default=0.0)
    parser.add_option("--cache-data-flag", type="int", default=0)
    parser.add_option("--cache-data-faultType", type="int", default=0)
    parser.add_option("--cache-data-faultRate", type="float", default=0.0)
    parser.add_option("--tlb-tag-flag", type="int", default=0)
    parser.add_option("--tlb-tag-faultType", type="int", default=0)
    parser.add_option("--tlb-tag-faultRate", type="float", default=0.0)
    parser.add_option("--tlb-state-flag", type="int", default=0)
    parser.add_option("--tlb-state-faultType", type="int", default=0)
    parser.add_option("--tlb-state-faultRate", type="float", default=0.0)
    parser.add_option("--tlb-data-flag", type="int", default=0)
    parser.add_option("--tlb-data-faultType", type="int", default=0)
    parser.add_option("--tlb-data-faultRate", type="float", default=0.0)
    parser.add_option("--encodingType",
                      type="choice",
                      default="none",
                      choices=[
                          'hamming', 'berger', 'cyclic', 'single_check',
                          'double_check', 'none'
                      ])
    parser.add_option("--encodingHidden", type="int", default=0)

    parser.add_option("--list-cpu-types",
                      action="callback",
                      callback=_listCpuTypes,
                      help="List available CPU types")
    parser.add_option("--test-Flag", type="int", default=0)
    parser.add_option("--cpu-type",
                      type="choice",
                      default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help="type of cpu to run with")
    parser.add_option("--checker", action="store_true")
    parser.add_option("-n", "--num-cpus", type="int", default=1)
    parser.add_option("--sys-voltage",
                      action="store",
                      type="string",
                      default='1.0V',
                      help="""Top-level voltage for blocks running at system
                      power supply""")
    parser.add_option("--sys-clock",
                      action="store",
                      type="string",
                      default='1GHz',
                      help="""Top-level clock for blocks running at system
                      speed""")
    parser.add_option("--cpu-clock",
                      action="store",
                      type="string",
                      default='2GHz',
                      help="Clock for blocks running at CPU speed")
    parser.add_option("--smt",
                      action="store_true",
                      default=False,
                      help="""
                      Only used if multiple programs are specified. If true,
                      then the number of threads per cpu is same as the
                      number of programs.""")
    parser.add_option("--elastic-trace-en",
                      action="store_true",
                      help="""Enable capture of data dependency and instruction
                      fetch traces using elastic trace probe.""")
    # Trace file paths input to trace probe in a capture simulation and input
    # to Trace CPU in a replay simulation
    parser.add_option("--inst-trace-file",
                      action="store",
                      type="string",
                      help="""Instruction fetch trace file input to
                      Elastic Trace probe in a capture simulation and
                      Trace CPU in a replay simulation""",
                      default="")
    parser.add_option("--data-trace-file",
                      action="store",
                      type="string",
                      help="""Data dependency trace file input to
                      Elastic Trace probe in a capture simulation and
                      Trace CPU in a replay simulation""",
                      default="")

    # Memory Options
    parser.add_option("--list-mem-types",
                      action="callback",
                      callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type",
                      type="choice",
                      default="DDR3_1600_x64",
                      choices=MemConfig.mem_names(),
                      help="type of memory to use")
    parser.add_option("--mem-channels",
                      type="int",
                      default=1,
                      help="number of memory channels")
    parser.add_option("--mem-ranks",
                      type="int",
                      default=None,
                      help="number of memory ranks per channel")
    parser.add_option("--mem-size",
                      action="store",
                      type="string",
                      default="512MB",
                      help="Specify the physical memory size (single memory)")

    parser.add_option("-l", "--lpae", action="store_true")
    parser.add_option("-V", "--virtualisation", action="store_true")

    parser.add_option("--memchecker", action="store_true")

    # Cache Options
    parser.add_option("--external-memory-system",
                      type="string",
                      help="use external ports of this port_type for caches")
    parser.add_option("--tlm-memory",
                      type="string",
                      help="use external port for SystemC TLM cosimulation")
    parser.add_option("--caches", action="store_true")
    parser.add_option("--l2cache", action="store_true")
    parser.add_option("--fastmem", action="store_true")
    parser.add_option("--num-dirs", type="int", default=1)
    parser.add_option("--num-l2caches", type="int", default=1)
    parser.add_option("--num-l3caches", type="int", default=1)
    parser.add_option("--l1d_size", type="string", default="64kB")
    parser.add_option("--l1i_size", type="string", default="32kB")
    parser.add_option("--l2_size", type="string", default="2MB")
    parser.add_option("--l3_size", type="string", default="16MB")
    parser.add_option("--l1d_assoc", type="int", default=2)
    parser.add_option("--l1i_assoc", type="int", default=2)
    parser.add_option("--l2_assoc", type="int", default=8)
    parser.add_option("--l3_assoc", type="int", default=16)
    parser.add_option("--cacheline_size", type="int", default=64)

    # dist-gem5 options
    parser.add_option("--dist",
                      action="store_true",
                      help="Parallel distributed gem5 simulation.")
    parser.add_option("--is-switch", action="store_true",
                      help="Select the network switch simulator process for a"\
                      "distributed gem5 run")
    parser.add_option("--dist-rank",
                      default=0,
                      action="store",
                      type="int",
                      help="Rank of this system within the dist gem5 run.")
    parser.add_option(
        "--dist-size",
        default=0,
        action="store",
        type="int",
        help="Number of gem5 processes within the dist gem5 run.")
    parser.add_option(
        "--dist-server-name",
        default="127.0.0.1",
        action="store",
        type="string",
        help="Name of the message server host\nDEFAULT: localhost")
    parser.add_option("--dist-server-port",
                      default=2200,
                      action="store",
                      type="int",
                      help="Message server listen port\nDEFAULT: 2200")
    parser.add_option(
        "--dist-sync-repeat",
        default="0us",
        action="store",
        type="string",
        help=
        "Repeat interval for synchronisation barriers among dist-gem5 processes\nDEFAULT: --ethernet-linkdelay"
    )
    parser.add_option(
        "--dist-sync-start",
        default="5200000000000t",
        action="store",
        type="string",
        help=
        "Time to schedule the first dist synchronisation barrier\nDEFAULT:5200000000000t"
    )
    parser.add_option("--ethernet-linkspeed",
                      default="10Gbps",
                      action="store",
                      type="string",
                      help="Link speed in bps\nDEFAULT: 10Gbps")
    parser.add_option("--ethernet-linkdelay",
                      default="10us",
                      action="store",
                      type="string",
                      help="Link delay in seconds\nDEFAULT: 10us")

    # Enable Ruby
    parser.add_option("--ruby", action="store_true")

    # Run duration options
    parser.add_option("-m", "--abs-max-tick", type="int", default=m5.MaxTick,
                      metavar="TICKS", help="Run to absolute simulated tick " \
                      "specified including ticks from a restored checkpoint")
    parser.add_option("--rel-max-tick", type="int", default=None,
                      metavar="TICKS", help="Simulate for specified number of" \
                      " ticks relative to the simulation start tick (e.g. if " \
                      "restoring a checkpoint)")
    parser.add_option("--maxtime", type="float", default=None,
                      help="Run to the specified absolute simulated time in " \
                      "seconds")
    parser.add_option("-I",
                      "--maxinsts",
                      action="store",
                      type="int",
                      default=None,
                      help="""Total number of instructions to
                                            simulate (default: run forever)""")
    parser.add_option("--work-item-id",
                      action="store",
                      type="int",
                      help="the specific work id for exit & checkpointing")
    parser.add_option("--num-work-ids",
                      action="store",
                      type="int",
                      help="Number of distinct work item types")
    parser.add_option("--work-begin-cpu-id-exit",
                      action="store",
                      type="int",
                      help="exit when work starts on the specified cpu")
    parser.add_option("--work-end-exit-count",
                      action="store",
                      type="int",
                      help="exit at specified work end count")
    parser.add_option("--work-begin-exit-count",
                      action="store",
                      type="int",
                      help="exit at specified work begin count")
    parser.add_option("--init-param",
                      action="store",
                      type="int",
                      default=0,
                      help="""Parameter available in simulation with m5
                              initparam""")
    parser.add_option("--initialize-only",
                      action="store_true",
                      default=False,
                      help="""Exit after initialization. Do not simulate time.
                              Useful when gem5 is run as a library.""")

    # Simpoint options
    parser.add_option("--simpoint-profile",
                      action="store_true",
                      help="Enable basic block profiling for SimPoints")
    parser.add_option("--simpoint-interval",
                      type="int",
                      default=10000000,
                      help="SimPoint interval in num of instructions")
    parser.add_option(
        "--take-simpoint-checkpoints",
        action="store",
        type="string",
        help="<simpoint file,weight file,interval-length,warmup-length>")
    parser.add_option("--restore-simpoint-checkpoint",
                      action="store_true",
                      help="restore from a simpoint checkpoint taken with " +
                      "--take-simpoint-checkpoints")

    # Checkpointing options
    ###Note that performing checkpointing via python script files will override
    ###checkpoint instructions built into binaries.
    parser.add_option(
        "--take-checkpoints",
        action="store",
        type="string",
        help="<M,N> take checkpoints at tick M and every N ticks thereafter")
    parser.add_option("--max-checkpoints",
                      action="store",
                      type="int",
                      help="the maximum number of checkpoints to drop",
                      default=5)
    parser.add_option("--checkpoint-dir",
                      action="store",
                      type="string",
                      help="Place all checkpoints in this absolute directory")
    parser.add_option("-r",
                      "--checkpoint-restore",
                      action="store",
                      type="int",
                      help="restore from checkpoint <N>")
    parser.add_option("--checkpoint-at-end",
                      action="store_true",
                      help="take a checkpoint at end of run")
    parser.add_option("--work-begin-checkpoint-count",
                      action="store",
                      type="int",
                      help="checkpoint at specified work begin count")
    parser.add_option("--work-end-checkpoint-count",
                      action="store",
                      type="int",
                      help="checkpoint at specified work end count")
    parser.add_option(
        "--work-cpus-checkpoint-count",
        action="store",
        type="int",
        help="checkpoint and exit when active cpu count is reached")
    parser.add_option("--restore-with-cpu",
                      action="store",
                      type="choice",
                      default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help="cpu type for restoring from a checkpoint")

    # CPU Switching - default switch model goes from a checkpoint
    # to a timing simple CPU with caches to warm up, then to detailed CPU for
    # data measurement
    parser.add_option(
        "--repeat-switch",
        action="store",
        type="int",
        default=None,
        help="switch back and forth between CPUs with period <N>")
    parser.add_option(
        "-s",
        "--standard-switch",
        action="store",
        type="int",
        default=None,
        help="switch from timing to Detailed CPU after warmup period of <N>")
    parser.add_option("-p",
                      "--prog-interval",
                      type="str",
                      help="CPU Progress Interval")

    # Fastforwarding and simpoint related materials
    parser.add_option(
        "-W",
        "--warmup-insts",
        action="store",
        type="int",
        default=None,
        help="Warmup period in total instructions (requires --standard-switch)"
    )
    parser.add_option(
        "--bench",
        action="store",
        type="string",
        default=None,
        help="base names for --take-checkpoint and --checkpoint-restore")
    parser.add_option(
        "-F",
        "--fast-forward",
        action="store",
        type="string",
        default=None,
        help="Number of instructions to fast forward before switching")
    parser.add_option(
        "-S",
        "--simpoint",
        action="store_true",
        default=False,
        help="""Use workload simpoints as an instruction offset for
                --checkpoint-restore or --take-checkpoint.""")
    parser.add_option(
        "--at-instruction",
        action="store_true",
        default=False,
        help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                number of instructions.""")
    parser.add_option(
        "--spec-input",
        default="ref",
        type="choice",
        choices=["ref", "test", "train", "smred", "mdred", "lgred"],
        help="Input set size for SPEC CPU2000 benchmarks.")
    parser.add_option("--arm-iset",
                      default="arm",
                      type="choice",
                      choices=["arm", "thumb", "aarch64"],
                      help="ARM instruction set.")
Beispiel #16
0
addToPath(os.getcwd() + '/configs/common')
import MemConfig

# This script aims at triggering low power state transitions in the DRAM
# controller. The traffic generator is used in DRAM mode and traffic
# states target a different levels of bank utilization and strides.
# At the end after sweeping through bank utilization and strides, we go
# through an idle state with no requests to enforce self-refresh.

parser = argparse.ArgumentParser(
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)

# Use a single-channel DDR4-2400 in 16x4 configuration by default
parser.add_argument("--mem-type",
                    default="DDR4_2400_16x4",
                    choices=MemConfig.mem_names(),
                    help="type of memory to use")

parser.add_argument("--mem-ranks",
                    "-r",
                    type=int,
                    default=1,
                    help="Number of ranks to iterate across")

parser.add_argument("--page-policy",
                    "-p",
                    choices=["close_adaptive", "open_adaptive"],
                    default="close_adaptive",
                    help="controller page policy")

parser.add_argument("--itt-list", "-t", default="1 20 100",
Beispiel #17
0
def getOptions():
    def _listCpuTypes(option, opt, value, parser):
        CpuConfig.print_cpu_list()
        sys.exit(0)

    def _listMemTypes(option, opt, value, parser):
        MemConfig.print_mem_list()
        sys.exit(0)

    parser = optparse.OptionParser()

    parser.add_option("--list-cpu-types",
                      action="callback",
                      callback=_listCpuTypes,
                      help="List available CPU types")
    parser.add_option("--cpu-type",
                      type="choice",
                      default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help="type of cpu to run with")

    parser.add_option("-c",
                      "--cmd",
                      default="",
                      type="string",
                      help="comma separated list of binaries")

    parser.add_option("--list-mem-types",
                      action="callback",
                      callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type",
                      type="choice",
                      default="DDR4_2400_x64",
                      choices=MemConfig.mem_names(),
                      help="type of memory to use")
    parser.add_option("--mem-channels",
                      type="int",
                      default=1,
                      help="number of memory channels")
    parser.add_option("--mem-ranks",
                      type="int",
                      default=None,
                      help="number of memory ranks per channel")

    parser.add_option("--pausepe",
                      default=-1,
                      type="int",
                      help="the PE to pause until GDB connects")
    parser.add_option(
        "--remote-gdb-port",
        type='int',
        default=7000,
        help="Remote gdb base port (set to 0 to disable listening)")
    parser.add_option("--debug-start",
                      metavar="TIME",
                      type='int',
                      help="Start debug output at TIME (must be in ticks)")

    parser.add_option("--sys-voltage",
                      action="store",
                      type="string",
                      default='1.0V',
                      help="""Top-level voltage for blocks running at system
                      power supply""")
    parser.add_option("--sys-clock",
                      action="store",
                      type="string",
                      default='1GHz',
                      help="""Top-level clock for blocks running at system
                      speed""")
    parser.add_option("--cpu-clock",
                      action="store",
                      type="string",
                      default='2GHz',
                      help="Clock for blocks running at CPU speed")

    parser.add_option("-m",
                      "--maxtick",
                      type="int",
                      default=m5.MaxTick,
                      metavar="T",
                      help="Stop after T ticks")

    Options.addFSOptions(parser)

    (options, args) = parser.parse_args()

    if args:
        print "Error: script doesn't take any positional arguments"
        sys.exit(1)

    return options
Beispiel #18
0
def build_test_system(np):
    cmdline = cmd_line_template()
    if buildEnv['TARGET_ISA'] == "alpha":
        test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby,
                                        cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "mips":
        test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "sparc":
        test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "x86":
        test_sys = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0],
                options.ruby, cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "arm":
        test_sys = makeArmSystem(test_mem_mode, options.machine_type,
                                 options.num_cpus, bm[0], options.dtb_filename,
                                 bare_metal=options.bare_metal,
                                 cmdline=cmdline,
                                 external_memory=options.external_memory_system)
        if options.enable_context_switch_stats_dump:
            test_sys.enable_context_switch_stats_dump = True
    else:
        fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA'])

    # Set the cache line size for the entire system
    test_sys.cache_line_size = options.cacheline_size

    # Create a top-level voltage domain
    test_sys.voltage_domain = VoltageDomain(voltage = options.sys_voltage)

    # Create a source clock for the system and set the clock period
    test_sys.clk_domain = SrcClockDomain(clock =  options.sys_clock,
            voltage_domain = test_sys.voltage_domain)

    # Create a CPU voltage domain
    test_sys.cpu_voltage_domain = VoltageDomain()

    # Create a source clock for the CPUs and set the clock period
    test_sys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
                                             voltage_domain =
                                             test_sys.cpu_voltage_domain)

    if options.kernel is not None:
        test_sys.kernel = binary(options.kernel)

    if options.script is not None:
        test_sys.readfile = options.script
        print "fs.py 131#: {}".format(test_sys.readfile)
    else:
        print "fs.py 133#: options.script is None."

    if options.lpae:
        test_sys.have_lpae = True

    if options.virtualisation:
        test_sys.have_virtualization = True

    test_sys.init_param = options.init_param

    # For now, assign all the CPUs to the same clock domain
    test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
                    for i in xrange(np)]

    if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass):
        test_sys.vm = KvmVM()

    test_sys.gpu = NoMaliGpu(
        gpu_type="T760",
        ver_maj=0, ver_min=0, ver_status=1,
        int_job=118, int_mmu=119, int_gpu=120,
        pio_addr=0x2b400000,
        pio=test_sys.membus.master)

    if options.ruby:
        # Check for timing mode because ruby does not support atomic accesses
        if not (options.cpu_type == "detailed" or options.cpu_type == "timing"):
            print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!"
            sys.exit(1)

        Ruby.create_system(options, True, test_sys, test_sys.iobus,
                           test_sys._dma_ports)

        # Create a seperate clock domain for Ruby
        test_sys.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
                                        voltage_domain = test_sys.voltage_domain)

        # Connect the ruby io port to the PIO bus,
        # assuming that there is just one such port.
        test_sys.iobus.master = test_sys.ruby._io_port.slave

        for (i, cpu) in enumerate(test_sys.cpu):
            #
            # Tie the cpu ports to the correct ruby system ports
            #
            cpu.clk_domain = test_sys.cpu_clk_domain
            cpu.createThreads()
            cpu.createInterruptController()

            cpu.icache_port = test_sys.ruby._cpu_ports[i].slave
            cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave

            if buildEnv['TARGET_ISA'] == "x86":
                cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave
                cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave

                cpu.interrupts[0].pio = test_sys.ruby._cpu_ports[i].master
                cpu.interrupts[0].int_master = test_sys.ruby._cpu_ports[i].slave
                cpu.interrupts[0].int_slave = test_sys.ruby._cpu_ports[i].master

    else:
        if options.caches or options.l2cache:
            # By default the IOCache runs at the system clock
            test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges)
            test_sys.iocache.cpu_side = test_sys.iobus.master
            test_sys.iocache.mem_side = test_sys.membus.slave
        elif not options.external_memory_system:
            test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges)
            test_sys.iobridge.slave = test_sys.iobus.master
            test_sys.iobridge.master = test_sys.membus.slave

        # Sanity check
        if options.fastmem:
            if TestCPUClass != AtomicSimpleCPU:
                fatal("Fastmem can only be used with atomic CPU!")
            if (options.caches or options.l2cache):
                fatal("You cannot use fastmem in combination with caches!")

        if options.simpoint_profile:
            if not options.fastmem:
                # Atomic CPU checked with fastmem option already
                fatal("SimPoint generation should be done with atomic cpu and fastmem")
            if np > 1:
                fatal("SimPoint generation not supported with more than one CPUs")

        for i in xrange(np):
            if options.fastmem:
                test_sys.cpu[i].fastmem = True
            if options.simpoint_profile:
                test_sys.cpu[i].addSimPointProbe(options.simpoint_interval)
            if options.checker:
                test_sys.cpu[i].addCheckerCpu()
            test_sys.cpu[i].createThreads()

        # If elastic tracing is enabled when not restoring from checkpoint and
        # when not fast forwarding using the atomic cpu, then check that the
        # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check
        # passes then attach the elastic trace probe.
        # If restoring from checkpoint or fast forwarding, the code that does this for
        # FutureCPUClass is in the Simulation module. If the check passes then the
        # elastic trace probe is attached to the switch CPUs.
        if options.elastic_trace_en and options.checkpoint_restore == None and \
            not options.fast_forward:
            CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, options)

        CacheConfig.config_cache(options, test_sys)

        MemConfig.config_mem(options, test_sys)

    return test_sys
SimpleOpts.add_option("--l2_size",
                      type="string",
                      default="2MB",
                      help="Total size of L2 cache")
SimpleOpts.add_option("--l2_assoc",
                      type="int",
                      default=8,
                      help="Associativity of L1-D cache")
SimpleOpts.add_option("--num_dirs",
                      type="int",
                      default=1,
                      help="Number of directories")
SimpleOpts.add_option("--mem_type",
                      type="choice",
                      default="DDR3_1600_8x8",
                      choices=MemConfig.mem_names(),
                      help="Type of memory to use")
SimpleOpts.add_option("--l3_size",
                      default='4MB',
                      help="L3 cache size. Default: 4MB")
SimpleOpts.add_option("--l3_banks",
                      default=4,
                      type='int',
                      help="L3 cache banks. Default: 4")
SimpleOpts.add_option("--no_prefetchers",
                      default=False,
                      action="store_true",
                      help="Enable prefectchers on the caches")

# Ruby options
# check configs/ruby/Ruby.py and related files
Beispiel #20
0
parser.add_option("-l", "--checks", metavar="N", default=100,
                  help="Stop after N checks (loads)")
parser.add_option("-f", "--wakeup_freq", metavar="N", default=10,
                  help="Wakeup every N cycles")

#
# Add the ruby specific and protocol specific options
#
Ruby.define_options(parser)

#execfile(os.path.join(config_root, "common", "Options.py"))

(options, args) = parser.parse_args()

gpgpusimconfig = MemConfig.parseGpgpusimConfig(options)

#
# Set the default cache size and associativity to be very small to encourage
# races between requests and writebacks.
#
options.l1d_size="256B"
options.l1i_size="256B"
options.l2_size="512B"
options.l3_size="1kB"
options.l1d_assoc=2
options.l1i_assoc=2
options.l2_assoc=2
options.l3_assoc=2

if args:
Beispiel #21
0
def build_test_system(np):
    cmdline = cmd_line_template()
    if buildEnv['TARGET_ISA'] == "alpha":
        test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby,
                                        cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "mips":
        test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "sparc":
        test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "x86":
        test_sys = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0],
                options.ruby, options, cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "arm":
        test_sys = makeArmSystem(test_mem_mode, options.machine_type,
                                 options.num_cpus, bm[0], options.dtb_filename,
                                 options,
                                 bare_metal=options.bare_metal,
                                 cmdline=cmdline)
        if options.enable_context_switch_stats_dump:
            test_sys.enable_context_switch_stats_dump = True
    else:
        fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA'])

    # Set the cache line size for the entire system
    test_sys.cache_line_size = options.cacheline_size

    # Create a top-level voltage domain
    test_sys.voltage_domain = VoltageDomain(voltage = options.sys_voltage)

    # Create a source clock for the system and set the clock period
    test_sys.clk_domain = SrcClockDomain(clock =  options.sys_clock,
            voltage_domain = test_sys.voltage_domain)

    # Create a clk running contantly at 3GHz for L2
    test_sys.clk_domain_const = SrcClockDomain(clock =  ["3GHz"],                                                                                                                      
            voltage_domain = test_sys.voltage_domain)

    # Create a CPU voltage domain
    #test_sys.cpu_voltage_domain = VoltageDomain(voltage = ['1V','0.9V','0.8V'])
    test_sys.cpu_voltage_domain = VoltageDomain() # lokeshjindal15

    # Create a source clock for the CPUs and set the clock period
    # vailable frequency steps: 3.10 GHz, 3.10 GHz, 2.90 GHz, 2.80 GHz, 2.60 GHz, 2.40 GHz, 2.30 GHz, 2.10 GHz, 1.90 GHz, 1.80 GHz, 1.60 GHz, 1.50 GHz, 1.30 GHz, 1.10 GHz, 1000 MHz, 800 MHz
    haswell_pstates = ["3.10GHz", "2.90GHz", "2.80GHz", "2.60GHz", "2.40GHz", "2.30GHz", "2.10GHz", "1.90GHz", "1.80GHz", "1.60GHz", "1.50GHz", "1.30GHz", "1.10GHz", "1000MHz",     "800MHz"]
    test_sys.cpu_clk_domain = SrcClockDomain(clock = haswell_pstates,
                                             voltage_domain =
                                             test_sys.cpu_voltage_domain,
                                             domain_id = 0)

    test_sys.cpu_clk_domain1 = SrcClockDomain(clock = haswell_pstates,
                                             voltage_domain =
                                             test_sys.cpu_voltage_domain,
                                             domain_id = 1)

    test_sys.cpu_clk_domain2 = SrcClockDomain(clock = haswell_pstates,
                                             voltage_domain =
                                             test_sys.cpu_voltage_domain,
                                             domain_id = 2)

    test_sys.cpu_clk_domain3 = SrcClockDomain(clock = haswell_pstates,
                                             voltage_domain =
                                             test_sys.cpu_voltage_domain,
                                             domain_id = 3)

    test_sys.dvfs_handler.transition_latency = '40us' 
    test_sys.dvfs_handler.domains =  [test_sys.cpu_clk_domain, test_sys.cpu_clk_domain1, test_sys.cpu_clk_domain2, test_sys.cpu_clk_domain3]
    test_sys.dvfs_handler.enable = 1

    if options.kernel is not None:
        test_sys.kernel = binary(options.kernel)

    if options.script is not None:
        test_sys.readfile = options.script

    if options.lpae:
        test_sys.have_lpae = True

    if options.virtualisation:
        test_sys.have_virtualization = True

    test_sys.init_param = options.init_param

    # For now, assign all the CPUs to the same clock domain
    test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=0, socket_id=0), TestCPUClass(clk_domain=test_sys.cpu_clk_domain1, cpu_id=1, socket_id=1), TestCPUClass(clk_domain=test_sys.cpu_clk_domain2, cpu_id=2, socket_id=2), TestCPUClass(clk_domain=test_sys.cpu_clk_domain3, cpu_id=3, socket_id=3)]

    if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass):
        test_sys.vm = KvmVM()

    if options.ruby:
        # Check for timing mode because ruby does not support atomic accesses
        if not (options.cpu_type == "detailed" or options.cpu_type == "timing"):
            print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!"
            sys.exit(1)

        Ruby.create_system(options, True, test_sys, test_sys.iobus,
                           test_sys._dma_ports)

        # Create a seperate clock domain for Ruby
        test_sys.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
                                        voltage_domain = test_sys.voltage_domain)

        # Connect the ruby io port to the PIO bus,
        # assuming that there is just one such port.
        test_sys.iobus.master = test_sys.ruby._io_port.slave

        for (i, cpu) in enumerate(test_sys.cpu):
            #
            # Tie the cpu ports to the correct ruby system ports
            #
            cpu.clk_domain = test_sys.cpu_clk_domain
            cpu.createThreads()
            cpu.createInterruptController()

            cpu.icache_port = test_sys.ruby._cpu_ports[i].slave
            cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave

            if buildEnv['TARGET_ISA'] == "x86":
                cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave
                cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave

                cpu.interrupts.pio = test_sys.ruby._cpu_ports[i].master
                cpu.interrupts.int_master = test_sys.ruby._cpu_ports[i].slave
                cpu.interrupts.int_slave = test_sys.ruby._cpu_ports[i].master

    else:
        if options.caches or options.l2cache:
            # By default the IOCache runs at the system clock
            test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges)
            test_sys.iocache.cpu_side = test_sys.iobus.master
            test_sys.iocache.mem_side = test_sys.membus.slave
        else:
            test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges)
            test_sys.iobridge.slave = test_sys.iobus.master
            test_sys.iobridge.master = test_sys.membus.slave

        # Sanity check
        if options.fastmem:
            if TestCPUClass != AtomicSimpleCPU:
                fatal("Fastmem can only be used with atomic CPU!")
            if (options.caches or options.l2cache):
                fatal("You cannot use fastmem in combination with caches!")

        if options.simpoint_profile:
            if not options.fastmem:
                # Atomic CPU checked with fastmem option already
                fatal("SimPoint generation should be done with atomic cpu and fastmem")
            if np > 1:
                fatal("SimPoint generation not supported with more than one CPUs")

        for i in xrange(np):
            if options.fastmem:
                test_sys.cpu[i].fastmem = True
            if options.simpoint_profile:
                test_sys.cpu[i].addSimPointProbe(options.simpoint_interval)
            if options.checker:
                test_sys.cpu[i].addCheckerCpu()
            test_sys.cpu[i].createThreads()

        CacheConfig.config_cache(options, test_sys)
        MemConfig.config_mem(options, test_sys)

    return test_sys
Beispiel #22
0
def setMemClass(options):
    """Returns a memory controller class."""

    return MemConfig.get(options.mem_type)
Beispiel #23
0
def build_test_system(np):
    if buildEnv['TARGET_ISA'] == "alpha":
        test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby)
    elif buildEnv['TARGET_ISA'] == "mips":
        test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0])
    elif buildEnv['TARGET_ISA'] == "sparc":
        test_sys = makeSparcSystem(test_mem_mode, bm[0])
    elif buildEnv['TARGET_ISA'] == "x86":
        test_sys = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0],
                options.ruby)
    elif buildEnv['TARGET_ISA'] == "arm":
        test_sys = makeArmSystem(test_mem_mode, options.machine_type,
                                 options.num_cpus, bm[0], options.dtb_filename,
                                 bare_metal=options.bare_metal)
        if options.enable_context_switch_stats_dump:
            test_sys.enable_context_switch_stats_dump = True
    else:
        fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA'])

    # Set the cache line size for the entire system
    test_sys.cache_line_size = options.cacheline_size

    # Create a top-level voltage domain
    test_sys.voltage_domain = VoltageDomain(voltage = options.sys_voltage)

    # Create a source clock for the system and set the clock period
    test_sys.clk_domain = SrcClockDomain(clock =  options.sys_clock,
            voltage_domain = test_sys.voltage_domain)

    # Create a CPU voltage domain
    test_sys.cpu_voltage_domain = VoltageDomain()

    # Create a source clock for the CPUs and set the clock period
    test_sys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
                                             voltage_domain =
                                             test_sys.cpu_voltage_domain)

    if options.kernel is not None:
        test_sys.kernel = binary(options.kernel)

    if options.script is not None:
        test_sys.readfile = options.script

    if options.lpae:
        test_sys.have_lpae = True

    if options.virtualisation:
        test_sys.have_virtualization = True

    test_sys.init_param = options.init_param

    # For now, assign all the CPUs to the same clock domain
    test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
                    for i in xrange(np)]

    if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass):
        test_sys.vm = KvmVM()

    if options.ruby:
        # Check for timing mode because ruby does not support atomic accesses
        if not (options.cpu_type == "detailed" or options.cpu_type == "timing"):
            print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!"
            sys.exit(1)

        Ruby.create_system(options, test_sys, test_sys.iobus, test_sys._dma_ports)

        # Create a seperate clock domain for Ruby
        test_sys.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
                                        voltage_domain = test_sys.voltage_domain)

        for (i, cpu) in enumerate(test_sys.cpu):
            #
            # Tie the cpu ports to the correct ruby system ports
            #
            cpu.clk_domain = test_sys.cpu_clk_domain
            cpu.createThreads()
            cpu.createInterruptController()

            cpu.icache_port = test_sys.ruby._cpu_ports[i].slave
            cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave

            if buildEnv['TARGET_ISA'] == "x86":
                cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave
                cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave

                cpu.interrupts.pio = test_sys.ruby._cpu_ports[i].master
                cpu.interrupts.int_master = test_sys.ruby._cpu_ports[i].slave
                cpu.interrupts.int_slave = test_sys.ruby._cpu_ports[i].master

            test_sys.ruby._cpu_ports[i].access_phys_mem = True

        # Create the appropriate memory controllers
        # and connect them to the IO bus
        test_sys.mem_ctrls = [TestMemClass(range = r) for r in test_sys.mem_ranges]
        for i in xrange(len(test_sys.mem_ctrls)):
            test_sys.mem_ctrls[i].port = test_sys.iobus.master

    else:
        if options.caches or options.l2cache:
            # By default the IOCache runs at the system clock
            test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges)
            test_sys.iocache.cpu_side = test_sys.iobus.master
            test_sys.iocache.mem_side = test_sys.membus.slave
        else:
            test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges)
            test_sys.iobridge.slave = test_sys.iobus.master
            test_sys.iobridge.master = test_sys.membus.slave

        # Sanity check
        if options.fastmem:
            if TestCPUClass != AtomicSimpleCPU:
                fatal("Fastmem can only be used with atomic CPU!")
            if (options.caches or options.l2cache):
                fatal("You cannot use fastmem in combination with caches!")

        for i in xrange(np):
            if options.fastmem:
                test_sys.cpu[i].fastmem = True
            if options.checker:
                test_sys.cpu[i].addCheckerCpu()
            test_sys.cpu[i].createThreads()

        BaseCacheConfig.config_cache(options, test_sys)
        MemConfig.config_mem(options, test_sys)

    return test_sys
Beispiel #24
0
def addCommonOptions(parser):
    # system options
    parser.add_option("--list-cpu-types",
                      action="callback", callback=_listCpuTypes,
                      help="List available CPU types")
    parser.add_option("--cpu-type", type="choice", default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help = "type of cpu to run with")
    parser.add_option("--checker", action="store_true");
    parser.add_option("-n", "--num-cpus", type="int", default=1)
    parser.add_option("--sys-voltage", action="store", type="string",
                      default='1.0V',
                      help = """Top-level voltage for blocks running at system
                      power supply""")
    parser.add_option("--sys-clock", action="store", type="string",
                      default='1GHz',
                      help = """Top-level clock for blocks running at system
                      speed""")
    parser.add_option("--cpu-clock", action="store", type="string",
                      default='2GHz',
                      help="Clock for blocks running at CPU speed")
    parser.add_option("--smt", action="store_true", default=False,
                      help = """
                      Only used if multiple programs are specified. If true,
                      then the number of threads per cpu is same as the
                      number of programs.""")
    parser.add_option("--elastic-trace-en", action="store_true",
                      help="""Enable capture of data dependency and instruction
                      fetch traces using elastic trace probe.""")
    # Trace file paths input to trace probe in a capture simulation and input
    # to Trace CPU in a replay simulation
    parser.add_option("--inst-trace-file", action="store", type="string",
                      help="""Instruction fetch trace file input to
                      Elastic Trace probe in a capture simulation and
                      Trace CPU in a replay simulation""", default="")
    parser.add_option("--data-trace-file", action="store", type="string",
                      help="""Data dependency trace file input to
                      Elastic Trace probe in a capture simulation and
                      Trace CPU in a replay simulation""", default="")

    #Silent TCP Ports for non-interactive simulations
    parser.add_option("--disable-ports", action="store_true", default=False,
                       help="Disable gdb/m5term ports.Usefull to run multiple "\
                       "batch simulations per compute-node")

    # Memory Options
    parser.add_option("--list-mem-types",
                      action="callback", callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64",
                      choices=MemConfig.mem_names(),
                      help = "type of memory to use")
    parser.add_option("--mem-channels", type="int", default=1,
                      help = "number of memory channels")
    parser.add_option("--mem-ranks", type="int", default=None,
                      help = "number of memory ranks per channel")
    parser.add_option("--mem-size", action="store", type="string",
                      default="512MB",
                      help="Specify the physical memory size (single memory)")

    parser.add_option("-l", "--lpae", action="store_true")
    parser.add_option("-V", "--virtualisation", action="store_true")

    parser.add_option("--memchecker", action="store_true")

    # Cache Options
    parser.add_option("--external-memory-system", type="string",
                      help="use external ports of this port_type for caches")
    parser.add_option("--tlm-memory", type="string",
                      help="use external port for SystemC TLM cosimulation")
    parser.add_option("--caches", action="store_true")
    parser.add_option("--l2cache", action="store_true")
    parser.add_option("--l3cache", action="store_true")
    parser.add_option("--fastmem", action="store_true")
    parser.add_option("--num-dirs", type="int", default=1)
    parser.add_option("--num-l2caches", type="int", default=1)
    parser.add_option("--num-l3caches", type="int", default=1)
    parser.add_option("--l1d_size", type="string", default="64kB")
    parser.add_option("--l1d_assoc", type="int", default="2")
    parser.add_option("--l1d_hit_latency", type="int", default="2")
    parser.add_option("--l1d_response_latency", type="int", default="2")
    parser.add_option("--l1d_mshrs", type="int", default="4")
    parser.add_option("--l1d_tgts_per_mshr", type="int", default="20")
    parser.add_option("--l1i_size", type="string", default="32kB")
    parser.add_option("--l1i_assoc", type="int", default="2")
    parser.add_option("--l1i_hit_latency", type="int", default="2")
    parser.add_option("--l1i_response_latency", type="int", default="2")
    parser.add_option("--l1i_mshrs", type="int", default="4")
    parser.add_option("--l1i_tgts_per_mshr", type="int", default="20")
    parser.add_option("--l2_size", type="string", default="512kB")
    parser.add_option("--l2_assoc", type="int", default="8")
    parser.add_option("--l2_hit_latency", type="int", default="20")
    parser.add_option("--l2_response_latency", type="int", default="20")
    parser.add_option("--l2_mshrs", type="int", default="20")
    parser.add_option("--l2_tgts_per_mshr", type="int", default="12")
    parser.add_option("--l3_size", type="string", default="8MB")
    parser.add_option("--l3_assoc", type="int", default="16")
    parser.add_option("--l3_hit_latency", type="int", default="50")
    parser.add_option("--l3_response_latency", type="int", default="50")
    parser.add_option("--l3_mshrs", type="int", default="20")
    parser.add_option("--l3_tgts_per_mshr", type="int", default="16")
    parser.add_option("--cacheline_size", type="int", default=64)
    parser.add_option("--l1_replacement", type="string", default="lru")
    parser.add_option("--l1_prefetcher", type="string", default="none")
    parser.add_option("--l2_replacement", type="string", default="lru")
    parser.add_option("--l2_prefetcher", type="string", default="none")
    parser.add_option("--l3_replacement", type="string", default="lru")
    parser.add_option("--l3_prefetcher", type="string", default="none")

    #Core options
    parser.add_option("--cachePorts", type="int", default=200)
    parser.add_option("--decodeToFetchDelay", type="int", default=1)
    parser.add_option("--renameToFetchDelay", type="int", default=1)
    parser.add_option("--iewToFetchDelay", type="int", default=1)
    parser.add_option("--commitToFetchDelay", type="int", default=1)
    parser.add_option("--fetchWidth", type="int", default=4)

    parser.add_option("--renameToDecodeDelay", type="int", default=1)
    parser.add_option("--iewToDecodeDelay", type="int", default=1, help="Issue/Execute/Writeback to decode delay")
    parser.add_option("--commitToDecodeDelay", type="int", default=1)
    parser.add_option("--fetchToDecodeDelay", type="int", default=1)
    parser.add_option("--decodeWidth", type="int", default=4)

    parser.add_option("--iewToRenameDelay", type="int", default=1, help="Issue/Execute/Writeback to rename delay")
    parser.add_option("--commitToRenameDelay", type="int", default=1)
    parser.add_option("--decodeToRenameDelay", type="int", default=1)
    parser.add_option("--renameWidth", type="int", default=4)

    parser.add_option("--commitToIEWDelay", type="int", default=1,help="Commit to Issue/Execute/Writeback delay")
    parser.add_option("--renameToIEWDelay", type="int", default=2,help="Rename to Issue/Execute/Writeback delay")
    parser.add_option("--issueToExecuteDelay", type="int", default=1, help="Issue to execute delay (internal to the IEW stage)")
    parser.add_option("--dispatchWidth", type="int", default=4)
    parser.add_option("--issueWidth", type="int", default=4)
    parser.add_option("--wbWidth", type="int", default=4)

    parser.add_option("--iewToCommitDelay", type="int", default=1, help="Issue/Execute/Writeback to commit delay")
    parser.add_option("--renameToROBDelay", type="int", default=1, help="Rename to reorder buffer delay")
    parser.add_option("--commitWidth", type="int", default=4)
    parser.add_option("--squashWidth", type="int", default=4)
    parser.add_option("--trapLatency", type="int", default=4)
    parser.add_option("--fetchTrapLatency", type="int", default=1)

    parser.add_option("--backComSize", type="int", default=5, help="Time buffer size for backwards communication")
    parser.add_option("--forwardComSize", type="int", default=5, help="Time buffer size for forward communication")

    parser.add_option("--LQEntries", type="int", default=32)
    parser.add_option("--SQEntries", type="int", default=32)
    parser.add_option("--LSQDepCheckShift", type="int", default=4, help="Number of places to shift addr before check")
    parser.add_option("--LSQCheckLoads", type="string", default="True", help="Should dependency violations be checked for loads & stores or just stores")
    parser.add_option("--store_set_clear_period", type="int", default=250000,
        help="Number of load/store insts before the dep predictor should be invalidated")
    parser.add_option("--LFSTSize", type="int", default=1024, help="Last fetched store table size")
    parser.add_option("--SSITSize", type="int", default=1024, help="Store set ID table size")

    parser.add_option("--numRobs", type="int", default=1)
    parser.add_option("--numPhysIntRegs", type="int", default=256)
    parser.add_option("--numPhysFloatRegs", type="int", default=256)
    parser.add_option("--numIQEntries", type="int", default=64)
    parser.add_option("--numROBEntries", type="int", default=192)

    parser.add_option("--smtNumFetchingThreads", type="int", default=1)
    parser.add_option("--smtFetchPolicy", type="string", default="singlethread",help="SMT Fetch policy. Values (not case-sensitive): singlethread, roundrobin, branch, iqcount or lsqcount")
    parser.add_option("--smtLSQPolicy", type="string", default="ldssqcount",help="SMT LSQ Sharing Policy. Values (not case-sensitive): dynamic, partitioned or threshold")
    parser.add_option("--smtLSQThreshold", type="int", default=100)
    parser.add_option("--smtIQPolicy", type="string", default="ldssqcount",help="SMT IQ Sharing Policy. Values (not case-sensitive): dynamic, partitioned or threshold")
    parser.add_option("--smtIQThreshold", type="int", default=100)
    parser.add_option("--smtROBPolicy", type="string", default="ldssqcount",help="SMT ROB Sharing Policy. Values (not case-sensitive): dynamic, partitioned or threshold")
    parser.add_option("--smtROBThreshold", type="int", default=100)
    parser.add_option("--smtCommitPolicy", type="string", default="ldssqcount",help="SMT Commit Policy. Values (not case-sensitive): aggressive, roundrobin or oldestready")

    #Branch predictor options
    parser.add_option("--BTBEntries", type="int", default=4096)
    parser.add_option("--BTBTagSize", type="int", default=16)
    parser.add_option("--RASSize", type="int", default=16)
    parser.add_option("--instShiftAmt", type="int", default=2)

    # dist-gem5 options
    parser.add_option("--dist", action="store_true",
                      help="Parallel distributed gem5 simulation.")
    parser.add_option("--is-switch", action="store_true",
                      help="Select the network switch simulator process for a"\
                      "distributed gem5 run")
    parser.add_option("--dist-rank", default=0, action="store", type="int",
                      help="Rank of this system within the dist gem5 run.")
    parser.add_option("--dist-size", default=0, action="store", type="int",
                      help="Number of gem5 processes within the dist gem5 run.")
    parser.add_option("--dist-server-name",
                      default="127.0.0.1",
                      action="store", type="string",
                      help="Name of the message server host\nDEFAULT: localhost")
    parser.add_option("--dist-server-port",
                      default=2200,
                      action="store", type="int",
                      help="Message server listen port\nDEFAULT: 2200")
    parser.add_option("--dist-sync-repeat",
                      default="0us",
                      action="store", type="string",
                      help="Repeat interval for synchronisation barriers among dist-gem5 processes\nDEFAULT: --ethernet-linkdelay")
    parser.add_option("--dist-sync-start",
                      default="5200000000000t",
                      action="store", type="string",
                      help="Time to schedule the first dist synchronisation barrier\nDEFAULT:5200000000000t")
    parser.add_option("--ethernet-linkspeed", default="10Gbps",
                        action="store", type="string",
                        help="Link speed in bps\nDEFAULT: 10Gbps")
    parser.add_option("--ethernet-linkdelay", default="10us",
                      action="store", type="string",
                      help="Link delay in seconds\nDEFAULT: 10us")

    # Enable Ruby
    parser.add_option("--ruby", action="store_true")

    # Run duration options
    parser.add_option("-m", "--abs-max-tick", type="int", default=m5.MaxTick,
                      metavar="TICKS", help="Run to absolute simulated tick " \
                      "specified including ticks from a restored checkpoint")
    parser.add_option("--rel-max-tick", type="int", default=None,
                      metavar="TICKS", help="Simulate for specified number of" \
                      " ticks relative to the simulation start tick (e.g. if " \
                      "restoring a checkpoint)")
    parser.add_option("--maxtime", type="float", default=None,
                      help="Run to the specified absolute simulated time in " \
                      "seconds")
    parser.add_option("-I", "--maxinsts", action="store", type="int",
                      default=None, help="""Total number of instructions to
                                            simulate (default: run forever)""")
    parser.add_option("--work-item-id", action="store", type="int",
                      help="the specific work id for exit & checkpointing")
    parser.add_option("--num-work-ids", action="store", type="int",
                      help="Number of distinct work item types")
    parser.add_option("--work-begin-cpu-id-exit", action="store", type="int",
                      help="exit when work starts on the specified cpu")
    parser.add_option("--work-end-exit-count", action="store", type="int",
                      help="exit at specified work end count")
    parser.add_option("--work-begin-exit-count", action="store", type="int",
                      help="exit at specified work begin count")
    parser.add_option("--init-param", action="store", type="int", default=0,
                      help="""Parameter available in simulation with m5
                              initparam""")
    parser.add_option("--initialize-only", action="store_true", default=False,
                      help="""Exit after initialization. Do not simulate time.
                              Useful when gem5 is run as a library.""")

    # Simpoint options
    parser.add_option("--simpoint-profile", action="store_true",
                      help="Enable basic block profiling for SimPoints")
    parser.add_option("--simpoint-interval", type="int", default=10000000,
                      help="SimPoint interval in num of instructions")
    parser.add_option("--take-simpoint-checkpoints", action="store", type="string",
        help="<simpoint file,weight file,interval-length,warmup-length>")
    parser.add_option("--restore-simpoint-checkpoint", action="store_true",
        help="restore from a simpoint checkpoint taken with " +
             "--take-simpoint-checkpoints")

    # Checkpointing options
    ###Note that performing checkpointing via python script files will override
    ###checkpoint instructions built into binaries.
    parser.add_option("--take-checkpoints", action="store", type="string",
        help="<M,N> take checkpoints at tick M and every N ticks thereafter")
    parser.add_option("--max-checkpoints", action="store", type="int",
        help="the maximum number of checkpoints to drop", default=5)
    parser.add_option("--checkpoint-dir", action="store", type="string",
        help="Place all checkpoints in this absolute directory")
    parser.add_option("-r", "--checkpoint-restore", action="store", type="int",
        help="restore from checkpoint <N>")
    parser.add_option("--checkpoint-at-end", action="store_true",
                      help="take a checkpoint at end of run")
    parser.add_option("--work-begin-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work begin count")
    parser.add_option("--work-end-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work end count")
    parser.add_option("--work-cpus-checkpoint-count", action="store", type="int",
                      help="checkpoint and exit when active cpu count is reached")
    parser.add_option("--restore-with-cpu", action="store", type="choice",
                      default="timing", choices=CpuConfig.cpu_names(),
                      help = "cpu type for restoring from a checkpoint")


    # CPU Switching - default switch model goes from a checkpoint
    # to a timing simple CPU with caches to warm up, then to detailed CPU for
    # data measurement
    parser.add_option("--repeat-switch", action="store", type="int",
        default=None,
        help="switch back and forth between CPUs with period <N>")
    parser.add_option("-s", "--standard-switch", action="store", type="int",
        default=None,
        help="switch from timing to Detailed CPU after warmup period of <N>")
    parser.add_option("-p", "--prog-interval", type="str",
        help="CPU Progress Interval")

    # Fastforwarding and simpoint related materials
    parser.add_option("-W", "--warmup-insts", action="store", type="int",
        default=None,
        help="Warmup period in total instructions (requires --standard-switch)")
    parser.add_option("--bench", action="store", type="string", default=None,
        help="base names for --take-checkpoint and --checkpoint-restore")
    parser.add_option("-F", "--fast-forward", action="store", type="string",
        default=None,
        help="Number of instructions to fast forward before switching")
    parser.add_option("-S", "--simpoint", action="store_true", default=False,
        help="""Use workload simpoints as an instruction offset for
                --checkpoint-restore or --take-checkpoint.""")
    parser.add_option("--at-instruction", action="store_true", default=False,
        help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                number of instructions.""")
    parser.add_option("--spec-input", default="ref", type="choice",
                      choices=["ref", "test", "train", "smred", "mdred",
                               "lgred"],
                      help="Input set size for SPEC CPU2000 benchmarks.")
    parser.add_option("--arm-iset", default="arm", type="choice",
                      choices=["arm", "thumb", "aarch64"],
                      help="ARM instruction set.")
    #begin ATC CODE (prietop)
    #Added the option to modify the random seed of the execution.
    parser.add_option("--random_seed", action="store", type="int",
                    default=None, help="Used for seeding the random number generator")
def addCommonOptions(parser):
    # system options
    parser.add_option("--list-cpu-types",
                      action="callback", callback=_listCpuTypes,
                      help="List available CPU types")
    parser.add_option("--cpu-type", type="choice", default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help = "type of cpu to run with")
    parser.add_option("--checker", action="store_true");
    parser.add_option("-n", "--num-cpus", type="int", default=1)
    parser.add_option("--sys-voltage", action="store", type="string",
                      default='1.0V',
                      help = """Top-level voltage for blocks running at system
                      power supply""")
    parser.add_option("--sys-clock", action="store", type="string",
                      default='1GHz',
                      help = """Top-level clock for blocks running at system
                      speed""")
    parser.add_option("--cpu-clock", action="store", type="string",
                      default='2GHz',
                      help="Clock for blocks running at CPU speed")
    parser.add_option("--smt", action="store_true", default=False,
                      help = """
                      Only used if multiple programs are specified. If true,
                      then the number of threads per cpu is same as the
                      number of programs.""")

    # Memory Options
    parser.add_option("--list-mem-types",
                      action="callback", callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type", type="choice", default="ddr3_1600_x64",
                      choices=MemConfig.mem_names(),
                      help = "type of memory to use")
    parser.add_option("--mem-channels", type="int", default=1,
                      help = "number of memory channels")
    parser.add_option("--mem-size", action="store", type="string",
                      default="512MB",
                      help="Specify the physical memory size (single memory)")

    parser.add_option("-l", "--lpae", action="store_true")
    parser.add_option("-V", "--virtualisation", action="store_true")

    # Cache Options
    parser.add_option("--caches", action="store_true")
    parser.add_option("--l2cache", action="store_true")
    parser.add_option("--fastmem", action="store_true")
    parser.add_option("--num-dirs", type="int", default=1)
    parser.add_option("--num-l2caches", type="int", default=1)
    parser.add_option("--num-l3caches", type="int", default=1)
    parser.add_option("--l1d_size", type="string", default="64kB")
    parser.add_option("--l1i_size", type="string", default="32kB")
    parser.add_option("--l2_size", type="string", default="2MB")
    parser.add_option("--l3_size", type="string", default="16MB")
    parser.add_option("--l1d_assoc", type="int", default=2)
    parser.add_option("--l1i_assoc", type="int", default=2)
    parser.add_option("--l2_assoc", type="int", default=8)
    parser.add_option("--l3_assoc", type="int", default=16)
    parser.add_option("--cacheline_size", type="int", default=64)

    # Enable Ruby
    parser.add_option("--ruby", action="store_true")

    # Run duration options
    parser.add_option("-m", "--abs-max-tick", type="int", default=m5.MaxTick,
                      metavar="TICKS", help="Run to absolute simulated tick " \
                      "specified including ticks from a restored checkpoint")
    parser.add_option("--rel-max-tick", type="int", default=None,
                      metavar="TICKS", help="Simulate for specified number of" \
                      " ticks relative to the simulation start tick (e.g. if " \
                      "restoring a checkpoint)")
    parser.add_option("--maxtime", type="float", default=None,
                      help="Run to the specified absolute simulated time in " \
                      "seconds")
    parser.add_option("-I", "--maxinsts", action="store", type="int",
                      default=None, help="""Total number of instructions to
                                            simulate (default: run forever)""")
    parser.add_option("--work-item-id", action="store", type="int",
                      help="the specific work id for exit & checkpointing")
    parser.add_option("--num-work-ids", action="store", type="int",
                      help="Number of distinct work item types")
    parser.add_option("--work-begin-cpu-id-exit", action="store", type="int",
                      help="exit when work starts on the specified cpu")
    parser.add_option("--work-end-exit-count", action="store", type="int",
                      help="exit at specified work end count")
    parser.add_option("--work-begin-exit-count", action="store", type="int",
                      help="exit at specified work begin count")
    parser.add_option("--init-param", action="store", type="int", default=0,
                      help="""Parameter available in simulation with m5
                              initparam""")

    # Simpoint options
    parser.add_option("--simpoint-profile", action="store_true",
                      help="Enable basic block profiling for SimPoints")
    parser.add_option("--simpoint-interval", type="int", default=10000000,
                      help="SimPoint interval in num of instructions")

    # Checkpointing options
    ###Note that performing checkpointing via python script files will override
    ###checkpoint instructions built into binaries.
    parser.add_option("--take-checkpoints", action="store", type="string",
        help="<M,N> take checkpoints at tick M and every N ticks thereafter")
    parser.add_option("--max-checkpoints", action="store", type="int",
        help="the maximum number of checkpoints to drop", default=5)
    parser.add_option("--checkpoint-dir", action="store", type="string",
        help="Place all checkpoints in this absolute directory")
    parser.add_option("-r", "--checkpoint-restore", action="store", type="int",
        help="restore from checkpoint <N>")
    parser.add_option("--checkpoint-at-end", action="store_true",
                      help="take a checkpoint at end of run")
    parser.add_option("--work-begin-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work begin count")
    parser.add_option("--work-end-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work end count")
    parser.add_option("--work-cpus-checkpoint-count", action="store", type="int",
                      help="checkpoint and exit when active cpu count is reached")
    parser.add_option("--restore-with-cpu", action="store", type="choice",
                      default="atomic", choices=CpuConfig.cpu_names(),
                      help = "cpu type for restoring from a checkpoint")


    # CPU Switching - default switch model goes from a checkpoint
    # to a timing simple CPU with caches to warm up, then to detailed CPU for
    # data measurement
    parser.add_option("--repeat-switch", action="store", type="int",
        default=None,
        help="switch back and forth between CPUs with period <N>")
    parser.add_option("-s", "--standard-switch", action="store", type="int",
        default=None,
        help="switch from timing to Detailed CPU after warmup period of <N>")
    parser.add_option("-p", "--prog-interval", type="str",
        help="CPU Progress Interval")

    # Fastforwarding and simpoint related materials
    parser.add_option("-W", "--warmup-insts", action="store", type="int",
        default=None,
        help="Warmup period in total instructions (requires --standard-switch)")
    parser.add_option("--bench", action="store", type="string", default=None,
        help="base names for --take-checkpoint and --checkpoint-restore")
    parser.add_option("-F", "--fast-forward", action="store", type="string",
        default=None,
        help="Number of instructions to fast forward before switching")
    parser.add_option("-S", "--simpoint", action="store_true", default=False,
        help="""Use workload simpoints as an instruction offset for
                --checkpoint-restore or --take-checkpoint.""")
    parser.add_option("--at-instruction", action="store_true", default=False,
        help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                number of instructions.""")
    parser.add_option("--spec-input", default="ref", type="choice",
                      choices=["ref", "test", "train", "smred", "mdred",
                               "lgred"],
                      help="Input set size for SPEC CPU2000 benchmarks.")
    parser.add_option("--arm-iset", default="arm", type="choice",
                      choices=["arm", "thumb", "aarch64"],
                      help="ARM instruction set.")


    # GemDroid options
    parser.add_option("--gemdroid", action="store_true", help="To enable <<< GemDroid >>> functionalities.")
    parser.add_option("--num_cpu_traces", type="int", default=1, help="Number of CPU trace files.")
    parser.add_option("--governor", type="int", default=1, help="Voltage Frequency governor: 0 - Disable; 1 - OnDemand; 2 - Performance; 3 - PowerSaving; 4 - Building; 5 - Interactive; 6 - Powercap; 7 - Slack; 8 - SlackOptimal")
    parser.add_option("--governor_timing", type="int", default=1, help="When to do DVFS: 1 - 10ms Fixed; 2 - 1ms Fixed; 3 - Frame Boundaries; 4 - IP Frame Boundaries")
    parser.add_option("--ip_freq", type="int", default=500, help="IP Freq in MHz.")
    parser.add_option("--core_freq", type="int", default=1000, help="CPU Freq in MHz.")
    parser.add_option("--mem_freq", type="int", default=800, help="Mem Freq in MHz.")
    parser.add_option("--cpu_trace1", action="store", type="string", default="none", help="Path to the CPU trace file1.")
    parser.add_option("--cpu_trace2", action="store", type="string", default="none", help="Path to the CPU trace file2.")
    parser.add_option("--cpu_trace3", action="store", type="string", default="none", help="Path to the CPU trace file3.")
    parser.add_option("--cpu_trace4", action="store", type="string", default="none", help="Path to the CPU trace file4.")
    parser.add_option("--gpu_trace", action="store", type="string", default="none.txt", help="Path to the GPU trace file.")    
    parser.add_option("--perfect_memory", action="store_true", help="Enable perfect memory.")
    parser.add_option("--no_periodic_stats", action="store_true", help="Disable periodic stats from GemDroid code.")
    parser.add_option("--sweep_val1", type="float", default=1, help="Value to use for the current sweep variable1.")    
    parser.add_option("--sweep_val2", type="float", default=1, help="Value to use for the current sweep variable2.")    
    parser.add_option("--device_config", type="string", default="ini/LPDDR3_micron_32M_8B_x8_sg15.ini", help="Mem Device configuration.")
    parser.add_option("--system_config", type="string", default="gemdroid.ini", help="Mem System configuration.")
Beispiel #26
0
# Sanity check
if options.fastmem:
    if TestCPUClass != AtomicSimpleCPU:
        fatal("Fastmem can only be used with atomic CPU!")
    if (options.caches or options.l2cache):
        fatal("You cannot use fastmem in combination with caches!")

for i in xrange(np):
    if options.fastmem:
        test_sys.cpu[i].fastmem = True
    if options.checker:
        test_sys.cpu[i].addCheckerCpu()
    test_sys.cpu[i].createThreads()

CacheConfig.config_cache(options, test_sys)
MemConfig.config_mem(options, test_sys)

if len(bm) == 2:
    if buildEnv['TARGET_ISA'] == 'alpha':
        drive_sys = makeLinuxAlphaSystem(drive_mem_mode, bm[1])
    elif buildEnv['TARGET_ISA'] == 'mips':
        drive_sys = makeLinuxMipsSystem(drive_mem_mode, bm[1])
    elif buildEnv['TARGET_ISA'] == 'sparc':
        drive_sys = makeSparcSystem(drive_mem_mode, bm[1])
    elif buildEnv['TARGET_ISA'] == 'x86':
        drive_sys = makeX86System(drive_mem_mode, np, bm[1])
    elif buildEnv['TARGET_ISA'] == 'arm':
        drive_sys = makeArmSystem(drive_mem_mode, options.machine_type, bm[1])

    # Create a top-level voltage domain
    drive_sys.voltage_domain = VoltageDomain(voltage = options.sys_voltage)
Beispiel #27
0
def _listDramcacheTypes(option, opt, value, parser):
    MemConfig.print_dramcache_list()
    sys.exit(0)
def create_system(options, full_system, system, dma_ports, ruby_system):

    if not buildEnv['GPGPU_SIM']:
        m5.util.panic("This script requires GPGPU-Sim integration to be built.")

    options.access_backing_store = True

    # Run the original protocol script
    buildEnv['PROTOCOL'] = buildEnv['PROTOCOL'].replace('split', 'fusion')
    protocol = buildEnv['PROTOCOL']
    exec "import %s" % protocol
    try:
        (cpu_sequencers, dir_cntrl_nodes, topology) = \
            eval("%s.create_system(options, full_system, system, dma_ports, ruby_system)" % protocol)
    except:
        print "Error: could not create system for ruby protocol inside fusion system %s" % protocol
        raise

    # Faking things to build the rest of the system
    print "Warning!"
    print "Warning: Faking split MOESI_hammer protocol; collecting checkpoints?"
    print "Warning!"

    if options.num_dev_dirs > 0:
        block_size_bits = int(math.log(options.cacheline_size, 2))
        gpu_phys_mem_size = system.gpu.gpu_memory_range.size()
        mem_module_size = gpu_phys_mem_size / options.num_dev_dirs

        #
        # determine size and index bits for probe filter
        # By default, the probe filter size is configured to be twice the
        # size of the L2 cache.
        #
        pf_size = MemorySize(options.sc_l2_size)
        pf_size.value = pf_size.value * 2
        dir_bits = int(math.log(options.num_dev_dirs, 2))
        pf_bits = int(math.log(pf_size.value, 2))
        if options.numa_high_bit:
            if options.pf_on or options.dir_on:
                # if numa high bit explicitly set, make sure it does not overlap
                # with the probe filter index
                assert(options.numa_high_bit - dir_bits > pf_bits)

            # set the probe filter start bit to just above the block offset
            pf_start_bit = block_size_bits
        else:
            if dir_bits > 0:
                pf_start_bit = dir_bits + block_size_bits - 1
            else:
                pf_start_bit = block_size_bits

        dev_dir_cntrls = []
        dev_mem_ctrls = []
        num_cpu_dirs = len(dir_cntrl_nodes)
        for i in xrange(options.num_dev_dirs):
            #
            # Create the Ruby objects associated with the directory controller
            #

            dir_version = i + num_cpu_dirs

            dir_size = MemorySize('0B')
            dir_size.value = mem_module_size

            pf = ProbeFilter(size = pf_size, assoc = 4,
                             start_index_bit = pf_start_bit)

            dev_dir_cntrl = Directory_Controller(version = dir_version,
                                 directory = \
                                 RubyDirectoryMemory( \
                                            version = dir_version,
                                            size = dir_size,
                                            numa_high_bit = \
                                            options.numa_high_bit,
                                            device_directory = True),
                                 probeFilter = pf,
                                 probe_filter_enabled = options.pf_on,
                                 full_bit_dir_enabled = options.dir_on,
                                 ruby_system = ruby_system)

            if options.recycle_latency:
                dev_dir_cntrl.recycle_latency = options.recycle_latency

            exec("ruby_system.dev_dir_cntrl%d = dev_dir_cntrl" % i)
            dev_dir_cntrls.append(dev_dir_cntrl)

            # Connect the directory controller to the network
            dev_dir_cntrl.forwardFromDir = ruby_system.network.slave
            dev_dir_cntrl.responseFromDir = ruby_system.network.slave
            dev_dir_cntrl.dmaResponseFromDir = ruby_system.network.slave

            dev_dir_cntrl.unblockToDir = ruby_system.network.master
            dev_dir_cntrl.responseToDir = ruby_system.network.master
            dev_dir_cntrl.requestToDir = ruby_system.network.master
            dev_dir_cntrl.dmaRequestToDir = ruby_system.network.master

            dev_mem_ctrl = MemConfig.create_mem_ctrl(
                MemConfig.get(options.mem_type), system.gpu.gpu_memory_range,
                i, options.num_dev_dirs, int(math.log(options.num_dev_dirs, 2)),
                options.cacheline_size)
            dev_mem_ctrl.port = dev_dir_cntrl.memory
            dev_mem_ctrls.append(dev_mem_ctrl)

            topology.addController(dev_dir_cntrl)

        system.dev_mem_ctrls = dev_mem_ctrls

    #
    # Create controller for the copy engine to connect to in GPU cluster
    # Cache is unused by controller
    #
    block_size_bits = int(math.log(options.cacheline_size, 2))
    l1i_cache = L1Cache(size = "2kB", assoc = 2)
    l1d_cache = L1Cache(size = "2kB", assoc = 2)
    l2_cache = L2Cache(size = "2kB",
                        assoc = 2,
                        start_index_bit = block_size_bits)

    l1_cntrl = L1Cache_Controller(version = options.num_cpus + options.num_sc,
                                      L1Icache = l1i_cache,
                                      L1Dcache = l1d_cache,
                                      L2cache = l2_cache,
                                      no_mig_atomic = not \
                                          options.allow_atomic_migration,
                                      send_evictions = (
                                          options.cpu_type == "detailed"),
                                      ruby_system = ruby_system)

    gpu_ce_seq = RubySequencer(version = options.num_cpus + options.num_sc,
                               icache = l1i_cache,
                               dcache = l1d_cache,
                               max_outstanding_requests = 64,
                               ruby_system = ruby_system,
                               connect_to_io = False)

    l1_cntrl.sequencer = gpu_ce_seq

    ruby_system.l1_cntrl_gpuce = l1_cntrl

    cpu_sequencers.append(gpu_ce_seq)
    topology.addController(l1_cntrl)

    # Connect the L1 controller and the network
    # Connect the buffers from the controller to network
    l1_cntrl.requestFromCache = ruby_system.network.slave
    l1_cntrl.responseFromCache = ruby_system.network.slave
    l1_cntrl.unblockFromCache = ruby_system.network.slave

    # Connect the buffers from the network to the controller
    l1_cntrl.forwardToCache = ruby_system.network.master
    l1_cntrl.responseToCache = ruby_system.network.master

    return (cpu_sequencers, dir_cntrl_nodes, topology)
def create_system(options, full_system, system, dma_devices, ruby_system):

    if not buildEnv['GPGPU_SIM']:
        m5.util.panic("This script requires GPGPU-Sim integration to be built.")

    # Run the protocol script to setup CPU cluster, directory and DMA
    (all_sequencers, dir_cntrls, dma_cntrls, cpu_cluster) = \
                                        VI_hammer.create_system(options,
                                                                full_system,
                                                                system,
                                                                dma_devices,
                                                                ruby_system)

    # If we're going to split the directories/memory controllers
    if options.num_dev_dirs > 0:
        cpu_cntrl_count = len(cpu_cluster)
    else:
        cpu_cntrl_count = len(cpu_cluster) + len(dir_cntrls)

    #
    # Create controller for the copy engine to connect to in CPU cluster
    # Cache is unused by controller
    #
    cache = L1Cache(size = "4096B", assoc = 2)

    cpu_ce_seq = RubySequencer(version = options.num_cpus + options.num_sc,
                               icache = cache,
                               dcache = cache,
                               max_outstanding_requests = 64,
                               ruby_system = ruby_system,
                               connect_to_io = False)

    cpu_ce_cntrl = GPUCopyDMA_Controller(version = 0,
                                         sequencer = cpu_ce_seq,
                                         number_of_TBEs = 256,
                                         ruby_system = ruby_system)

    cpu_cntrl_count += 1

    cpu_ce_cntrl.responseFromDir = ruby_system.network.master
    cpu_ce_cntrl.reqToDirectory = ruby_system.network.slave

    #
    # Build GPU cluster
    #
    gpu_cluster = Cluster(intBW = 32, extBW = 32)
    gpu_cluster.disableConnectToParent()

    l2_bits = int(math.log(options.num_l2caches, 2))
    block_size_bits = int(math.log(options.cacheline_size, 2))
    # This represents the L1 to L2 interconnect latency
    # NOTE! This latency is in Ruby (cache) cycles, not SM cycles
    per_hop_interconnect_latency = 45 # ~15 GPU cycles
    num_dance_hall_hops = int(math.log(options.num_sc, 2))
    if num_dance_hall_hops == 0:
        num_dance_hall_hops = 1
    l1_to_l2_noc_latency = per_hop_interconnect_latency * num_dance_hall_hops

    #
    # Caches for GPU cores
    #
    for i in xrange(options.num_sc):
        #
        # First create the Ruby objects associated with the GPU cores
        #
        cache = L1Cache(size = options.sc_l1_size,
                            assoc = options.sc_l1_assoc,
                            replacement_policy = "LRU",
                            start_index_bit = block_size_bits,
                            dataArrayBanks = 4,
                            tagArrayBanks = 4,
                            dataAccessLatency = 4,
                            tagAccessLatency = 4,
                            resourceStalls = False)

        l1_cntrl = GPUL1Cache_Controller(version = i,
                                  cache = cache,
                                  l2_select_num_bits = l2_bits,
                                  num_l2 = options.num_l2caches,
                                  issue_latency = l1_to_l2_noc_latency,
                                  number_of_TBEs = options.gpu_l1_buf_depth,
                                  ruby_system = ruby_system)

        gpu_seq = RubySequencer(version = options.num_cpus + i,
                            icache = cache,
                            dcache = cache,
                            max_outstanding_requests = options.gpu_l1_buf_depth,
                            ruby_system = ruby_system,
                            deadlock_threshold = 2000000,
                            connect_to_io = False)

        l1_cntrl.sequencer = gpu_seq

        exec("ruby_system.l1_cntrl_sp%02d = l1_cntrl" % i)

        #
        # Add controllers and sequencers to the appropriate lists
        #
        all_sequencers.append(gpu_seq)
        gpu_cluster.add(l1_cntrl)

        # Connect the controller to the network
        l1_cntrl.requestFromL1Cache = ruby_system.network.slave
        l1_cntrl.responseToL1Cache = ruby_system.network.master

    l2_index_start = block_size_bits + l2_bits
    # Use L2 cache and interconnect latencies to calculate protocol latencies
    # NOTE! These latencies are in Ruby (cache) cycles, not SM cycles
    l2_cache_access_latency = 30 # ~10 GPU cycles
    l2_to_l1_noc_latency = per_hop_interconnect_latency * num_dance_hall_hops
    l2_to_mem_noc_latency = 125 # ~40 GPU cycles

    l2_clusters = []
    for i in xrange(options.num_l2caches):
        #
        # First create the Ruby objects associated with this cpu
        #
        l2_cache = L2Cache(size = options.sc_l2_size,
                           assoc = options.sc_l2_assoc,
                           start_index_bit = l2_index_start,
                           replacement_policy = "LRU",
                           dataArrayBanks = 4,
                           tagArrayBanks = 4,
                           dataAccessLatency = 4,
                           tagAccessLatency = 4,
                           resourceStalls = options.gpu_l2_resource_stalls)

	region_buffer = regionBuffer_Obj(size = "8MB",
                           assoc = 2^16,
                           start_index_bit = l2_index_start,
                           replacement_policy = "LRU",
                           dataArrayBanks = 4,
                           tagArrayBanks = 4,
                           dataAccessLatency = 4,
                           tagAccessLatency = 4,
                           resourceStalls = options.gpu_l2_resource_stalls,
 			   regionSize = options.region_size)



        l2_cntrl = GPUL2Cache_Controller(version = i,
                                L2cache = l2_cache,
				regionBuffer = region_buffer,
                                l2_response_latency = l2_cache_access_latency +
                                                      l2_to_l1_noc_latency,
                                l2_request_latency = l2_to_mem_noc_latency,
                                cache_response_latency = l2_cache_access_latency,
                                ruby_system = ruby_system)

        exec("ruby_system.l2_cntrl%d = l2_cntrl" % i)
        l2_cluster = Cluster(intBW = 32, extBW = 32)
        l2_cluster.add(l2_cntrl)
        gpu_cluster.add(l2_cluster)
        l2_clusters.append(l2_cluster)

        # Connect the controller to the network
        l2_cntrl.responseToL1Cache = ruby_system.network.slave
        l2_cntrl.requestFromCache = ruby_system.network.slave
        l2_cntrl.responseFromCache = ruby_system.network.slave
        l2_cntrl.unblockFromCache = ruby_system.network.slave

        l2_cntrl.requestFromL1Cache = ruby_system.network.master
        l2_cntrl.forwardToCache = ruby_system.network.master
        l2_cntrl.responseToCache = ruby_system.network.master

    gpu_phys_mem_size = system.gpu.gpu_memory_range.size()

    if options.num_dev_dirs > 0:
        mem_module_size = gpu_phys_mem_size / options.num_dev_dirs

        #
        # determine size and index bits for probe filter
        # By default, the probe filter size is configured to be twice the
        # size of the L2 cache.
        #
        pf_size = MemorySize(options.sc_l2_size)
        pf_size.value = pf_size.value * 2
        dir_bits = int(math.log(options.num_dev_dirs, 2))
        pf_bits = int(math.log(pf_size.value, 2))
        if options.numa_high_bit:
            if options.pf_on or options.dir_on:
                # if numa high bit explicitly set, make sure it does not overlap
                # with the probe filter index
                assert(options.numa_high_bit - dir_bits > pf_bits)

            # set the probe filter start bit to just above the block offset
            pf_start_bit = block_size_bits
        else:
            if dir_bits > 0:
                pf_start_bit = dir_bits + block_size_bits - 1
            else:
                pf_start_bit = block_size_bits

        dev_dir_cntrls = []
        dev_mem_ctrls = []
        num_cpu_dirs = len(dir_cntrls)
        for i in xrange(options.num_dev_dirs):
            #
            # Create the Ruby objects associated with the directory controller
            #

            dir_version = i + num_cpu_dirs

            dir_size = MemorySize('0B')
            dir_size.value = mem_module_size

            pf = ProbeFilter(size = pf_size, assoc = 4,
                             start_index_bit = pf_start_bit)

            dev_dir_cntrl = Directory_Controller(version = dir_version,
                                 directory = \
                                 RubyDirectoryMemory( \
                                            version = dir_version,
                                            size = dir_size,
                                            numa_high_bit = \
                                            options.numa_high_bit,
                                            device_directory = True),
                                 probeFilter = pf,
                                 probe_filter_enabled = options.pf_on,
                                 full_bit_dir_enabled = options.dir_on,
                                 ruby_system = ruby_system)

            if options.recycle_latency:
                dev_dir_cntrl.recycle_latency = options.recycle_latency

            exec("ruby_system.dev_dir_cntrl%d = dev_dir_cntrl" % i)
            dev_dir_cntrls.append(dev_dir_cntrl)

            # Connect the directory controller to the network
            dev_dir_cntrl.forwardFromDir = ruby_system.network.slave
            dev_dir_cntrl.responseFromDir = ruby_system.network.slave
            dev_dir_cntrl.dmaResponseFromDir = ruby_system.network.slave

            dev_dir_cntrl.unblockToDir = ruby_system.network.master
            dev_dir_cntrl.responseToDir = ruby_system.network.master
            dev_dir_cntrl.requestToDir = ruby_system.network.master
            dev_dir_cntrl.dmaRequestToDir = ruby_system.network.master

            dev_mem_ctrl = MemConfig.create_mem_ctrl(
                MemConfig.get(options.mem_type), system.gpu.gpu_memory_range,
                i, options.num_dev_dirs, int(math.log(options.num_dev_dirs, 2)),
                options.cacheline_size)
            dev_mem_ctrl.port = dev_dir_cntrl.memory
            dev_mem_ctrls.append(dev_mem_ctrl)

        system.dev_mem_ctrls = dev_mem_ctrls
    else:
        # Since there are no device directories, use CPU directories
        # Fix up the memory sizes of the CPU directories
        num_dirs = len(dir_cntrls)
        add_gpu_mem = gpu_phys_mem_size / num_dirs
        for cntrl in dir_cntrls:
            new_size = cntrl.directory.size.value + add_gpu_mem
            cntrl.directory.size.value = new_size

    #
    # Create controller for the copy engine to connect to in GPU cluster
    # Cache is unused by controller
    #
    cache = L1Cache(size = "4096B", assoc = 2)

    gpu_ce_seq = RubySequencer(version = options.num_cpus + options.num_sc + 1,
                               icache = cache,
                               dcache = cache,
                               max_outstanding_requests = 64,
                               support_inst_reqs = False,
                               ruby_system = ruby_system,
                               connect_to_io = False)

    gpu_ce_cntrl = GPUCopyDMA_Controller(version = 1,
                                  sequencer = gpu_ce_seq,
                                  number_of_TBEs = 256,
                                  ruby_system = ruby_system)

    ruby_system.l1_cntrl_ce = gpu_ce_cntrl

    all_sequencers.append(cpu_ce_seq)
    all_sequencers.append(gpu_ce_seq)

    gpu_ce_cntrl.responseFromDir = ruby_system.network.master
    gpu_ce_cntrl.reqToDirectory = ruby_system.network.slave

    complete_cluster = Cluster(intBW = 32, extBW = 32)
    complete_cluster.add(cpu_ce_cntrl)
    complete_cluster.add(gpu_ce_cntrl)
    complete_cluster.add(cpu_cluster)
    complete_cluster.add(gpu_cluster)

    for cntrl in dir_cntrls:
        complete_cluster.add(cntrl)

    for cntrl in dev_dir_cntrls:
        complete_cluster.add(cntrl)

    for cntrl in dma_cntrls:
        complete_cluster.add(cntrl)

    for cluster in l2_clusters:
        complete_cluster.add(cluster)

    return (all_sequencers, dir_cntrls, complete_cluster)
Beispiel #30
0
Datei: fs.py Projekt: abusse/gem5
def build_test_system(np):
    cmdline = cmd_line_template()
    if buildEnv['TARGET_ISA'] == "alpha":
        test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby,
                                        cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "mips":
        test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "sparc":
        test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "x86":
        test_sys = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0],
                options.ruby, cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "arm":
        test_sys = makeArmSystem(test_mem_mode, options.machine_type,
                                 options.num_cpus, bm[0], options.dtb_filename,
                                 bare_metal=options.bare_metal,
                                 cmdline=cmdline,
                                 external_memory=options.external_memory_system)
        if options.enable_context_switch_stats_dump:
            test_sys.enable_context_switch_stats_dump = True
    else:
        fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA'])

    # Set the cache line size for the entire system
    test_sys.cache_line_size = options.cacheline_size

    # Create a top-level voltage domain
    test_sys.voltage_domain = VoltageDomain(voltage = options.sys_voltage)

    # Create a source clock for the system and set the clock period
    test_sys.clk_domain = SrcClockDomain(clock =  options.sys_clock,
            voltage_domain = test_sys.voltage_domain)

    # Create a CPU voltage domain
    test_sys.cpu_voltage_domain = VoltageDomain()

    # Create a source clock for the CPUs and set the clock period
    test_sys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
                                             voltage_domain =
                                             test_sys.cpu_voltage_domain)

    if options.kernel is not None:
        test_sys.kernel = binary(options.kernel)

    if options.script is not None:
        test_sys.readfile = options.script

    if options.lpae:
        test_sys.have_lpae = True

    if options.virtualisation:
        test_sys.have_virtualization = True

    test_sys.init_param = options.init_param

    # For now, assign all the CPUs to the same clock domain
    test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i,
                                 function_trace=options.enable_trace)
                    for i in xrange(np)]

    if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass):
        test_sys.vm = KvmVM()

    if options.ruby:
        # Check for timing mode because ruby does not support atomic accesses
        if not (options.cpu_type == "detailed" or options.cpu_type == "timing"):
            print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!"
            sys.exit(1)

        Ruby.create_system(options, True, test_sys, test_sys.iobus,
                           test_sys._dma_ports)

        # Create a seperate clock domain for Ruby
        test_sys.ruby.clk_domain = SrcClockDomain(clock = options.ruby_clock,
                                        voltage_domain = test_sys.voltage_domain)

        # Connect the ruby io port to the PIO bus,
        # assuming that there is just one such port.
        test_sys.iobus.master = test_sys.ruby._io_port.slave

        for (i, cpu) in enumerate(test_sys.cpu):
            #
            # Tie the cpu ports to the correct ruby system ports
            #
            cpu.clk_domain = test_sys.cpu_clk_domain
            cpu.createThreads()
            cpu.createInterruptController()

            cpu.icache_port = test_sys.ruby._cpu_ports[i].slave
            cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave

            if buildEnv['TARGET_ISA'] == "x86":
                cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave
                cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave

                cpu.interrupts[0].pio = test_sys.ruby._cpu_ports[i].master
                cpu.interrupts[0].int_master = test_sys.ruby._cpu_ports[i].slave
                cpu.interrupts[0].int_slave = test_sys.ruby._cpu_ports[i].master

    else:
        if options.caches or options.l2cache:
            # By default the IOCache runs at the system clock
            test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges)
            test_sys.iocache.cpu_side = test_sys.iobus.master
            test_sys.iocache.mem_side = test_sys.membus.slave
        elif not options.external_memory_system:
            test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges)
            test_sys.iobridge.slave = test_sys.iobus.master
            test_sys.iobridge.master = test_sys.membus.slave

        # Sanity check
        if options.fastmem:
            if TestCPUClass != AtomicSimpleCPU:
                fatal("Fastmem can only be used with atomic CPU!")
            if (options.caches or options.l2cache):
                fatal("You cannot use fastmem in combination with caches!")

        if options.simpoint_profile:
            if not options.fastmem:
                # Atomic CPU checked with fastmem option already
                fatal("SimPoint generation should be done with atomic cpu and fastmem")
            if np > 1:
                fatal("SimPoint generation not supported with more than one CPUs")

        for i in xrange(np):
            if options.fastmem:
                test_sys.cpu[i].fastmem = True
            if options.simpoint_profile:
                test_sys.cpu[i].addSimPointProbe(options.simpoint_interval)
            if options.checker:
                test_sys.cpu[i].addCheckerCpu()
            test_sys.cpu[i].createThreads()

        # If elastic tracing is enabled when not restoring from checkpoint and
        # when not fast forwarding using the atomic cpu, then check that the
        # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check
        # passes then attach the elastic trace probe.
        # If restoring from checkpoint or fast forwarding, the code that does this for
        # FutureCPUClass is in the Simulation module. If the check passes then the
        # elastic trace probe is attached to the switch CPUs.
        if options.elastic_trace_en and options.checkpoint_restore == None and \
            not options.fast_forward:
            CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, options)

        CacheConfig.config_cache(options, test_sys)

        MemConfig.config_mem(options, test_sys)

    return test_sys
Beispiel #31
0
def addCommonOptions(parser):
    # system options
    parser.add_option("--list-cpu-types",
                      action="callback",
                      callback=_listCpuTypes,
                      help="List available CPU types")
    parser.add_option("--cpu-type",
                      type="choice",
                      default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help="type of cpu to run with")
    parser.add_option("--cpus-types",
                      action="store",
                      type="string",
                      default="atomic")
    parser.add_option("--checker", action="store_true")
    parser.add_option(
        "--num-cpus-types",
        action="store",
        type="int",
        help=
        "If != 1, then all CPU related types must be specified. Each CPU type will have at least one diferent L2 cache"
    )
    parser.add_option("--num-cpus-eachtype",
                      action="store",
                      type="string",
                      default="1")  #, nargs='+')
    parser.add_option("--cpus-type-names",
                      action="store",
                      type="string",
                      default="big")  #, nargs='+')
    parser.add_option("--sys-voltage",
                      action="store",
                      type="string",
                      default='1.0V',
                      help="""Top-level voltage for blocks running at system
                      power supply""")
    parser.add_option("--sys-clock",
                      action="store",
                      type="string",
                      default='1GHz',
                      help="""Top-level clock for blocks running at system
                      speed""")
    parser.add_option("--cpu-clock",
                      action="store",
                      type="string",
                      default='2GHz',
                      help="Clock for blocks running at CPU speed")
    parser.add_option(
        "--cpu-voltage",
        action="store",
        type="string",
        default='1.0V',
        help="""Top-level voltage for blocks running at CPU power supply""")
    parser.add_option("--tech-node",
                      action="store",
                      type="int",
                      default=65,
                      help="Technology node in nm")
    parser.add_option("--smt",
                      action="store_true",
                      default=False,
                      help="""
                      Only used if multiple programs are specified. If true,
                      then the number of threads per cpu is same as the
                      number of programs.""")
    parser.add_option("--elastic-trace-en",
                      action="store_true",
                      help="""Enable capture of data dependency and instruction
                      fetch traces using elastic trace probe.""")
    # Trace file paths input to trace probe in a capture simulation and input
    # to Trace CPU in a replay simulation
    parser.add_option("--inst-trace-file",
                      action="store",
                      type="string",
                      help="""Instruction fetch trace file input to
                      Elastic Trace probe in a capture simulation and
                      Trace CPU in a replay simulation""",
                      default="")
    parser.add_option("--data-trace-file",
                      action="store",
                      type="string",
                      help="""Data dependency trace file input to
                      Elastic Trace probe in a capture simulation and
		      Trace CPU in a replay simulation""",
                      default="")

    parser.add_option("--no-mcpat", action="store_true", default=False)

    # CPU conf. options
    parser.add_option(
        "--cpu-pipeline-width",
        action="store",
        type="string",  # nargs='+',
        default="8")
    parser.add_option(
        "--cpu-LQentries",
        action="store",
        type="string",  # nargs='+',
        default="32")
    parser.add_option(
        "--cpu-SQentries",
        action="store",
        type="string",  # nargs='+',
        default="32")
    parser.add_option(
        "--cpu-IQentries",
        action="store",
        type="string",  # nargs='+',
        default="64")
    parser.add_option(
        "--cpu-ROBentries",
        action="store",
        type="string",  # nargs='+',
        default="192")
    parser.add_option(
        "--cpu-numPhysIntRegs",
        action="store",
        type="string",  # nargs='+',
        default="256")
    parser.add_option(
        "--cpu-numPhysFloatRegs",
        action="store",
        type="string",  # nargs='+',
        default="256")
    parser.add_option(
        "--cpu-localPredictorSize",
        action="store",
        type="string",  # nargs='+',
        default="2048")
    parser.add_option(
        "--cpu-globalPredictorSize",
        action="store",
        type="string",  # nargs='+',
        default="8192")
    parser.add_option(
        "--cpu-choicePredictorSize",
        action="store",
        type="string",  # nargs='+',
        default="8192")
    parser.add_option(
        "--cpu-BTBEntries",
        action="store",
        type="string",  # nargs='+',
        default="4096")
    parser.add_option(
        "--cpu-RASSize",
        action="store",
        type="string",  # nargs='+',
        default="16")

    # Memory Options
    parser.add_option("--list-mem-types",
                      action="callback",
                      callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type",
                      type="choice",
                      default="DDR3_1600_x64",
                      choices=MemConfig.mem_names(),
                      help="type of memory to use")
    parser.add_option("--mem-channels",
                      type="int",
                      default=1,
                      help="number of memory channels")
    parser.add_option("--mem-ranks",
                      type="int",
                      default=None,
                      help="number of memory ranks per channel")
    parser.add_option("--mem-size",
                      action="store",
                      type="string",
                      default="512MB",
                      help="Specify the physical memory size (single memory)")

    parser.add_option("-l", "--lpae", action="store_true")
    parser.add_option("-V", "--virtualisation", action="store_true")

    parser.add_option("--memchecker", action="store_true")

    # Cache Options
    parser.add_option("--external-memory-system",
                      type="string",
                      help="use external ports of this port_type for caches")
    parser.add_option("--tlm-memory",
                      type="string",
                      help="use external port for SystemC TLM cosimulation")
    parser.add_option("--fastmem", action="store_true")
    parser.add_option("--caches", action="store_true")
    parser.add_option("--num-dirs", type="int", default=1)
    parser.add_option("--num-l2caches", type="string",
                      default="1")  #, nargs='+')
    parser.add_option("--l1d-size", type="string",
                      default="32kB")  #, nargs='+')
    parser.add_option("--l1i-size", type="string",
                      default="16kB")  #, nargs='+')
    parser.add_option("--l2-size", type="string", default="1MB")  #, nargs='+')
    parser.add_option("--l1d-assoc", type="string", default="2")  #, nargs='+')
    parser.add_option("--l1i-assoc", type="string", default="2")  #, nargs='+')
    parser.add_option("--l2-assoc", type="string", default="8")  #, nargs='+')
    parser.add_option("--cacheline_size", type="int", default=64)

    # Enable Ruby
    parser.add_option("--ruby", action="store_true")

    # Run duration options
    parser.add_option("-m", "--abs-max-tick", type="int", default=m5.MaxTick,
                      metavar="TICKS", help="Run to absolute simulated tick " \
                      "specified including ticks from a restored checkpoint")
    parser.add_option("--rel-max-tick", type="int", default=None,
                      metavar="TICKS", help="Simulate for specified number of" \
                      " ticks relative to the simulation start tick (e.g. if " \
                      "restoring a checkpoint)")
    parser.add_option("--maxtime", type="float", default=None,
                      help="Run to the specified absolute simulated time in " \
                      "seconds")
    parser.add_option("-I",
                      "--maxinsts",
                      action="store",
                      type="int",
                      default=None,
                      help="""Total number of instructions to
                                            simulate (default: run forever)""")
    parser.add_option("--work-item-id",
                      action="store",
                      type="int",
                      help="the specific work id for exit & checkpointing")
    parser.add_option("--num-work-ids",
                      action="store",
                      type="int",
                      help="Number of distinct work item types")
    parser.add_option("--work-begin-cpu-id-exit",
                      action="store",
                      type="int",
                      help="exit when work starts on the specified cpu")
    parser.add_option("--work-end-exit-count",
                      action="store",
                      type="int",
                      help="exit at specified work end count")
    parser.add_option("--work-begin-exit-count",
                      action="store",
                      type="int",
                      help="exit at specified work begin count")
    parser.add_option("--init-param",
                      action="store",
                      type="int",
                      default=0,
                      help="""Parameter available in simulation with m5
                              initparam""")
    parser.add_option("--initialize-only",
                      action="store_true",
                      default=False,
                      help="""Exit after initialization. Do not simulate time.
                              Useful when gem5 is run as a library.""")

    # Simpoint options
    parser.add_option("--simpoint-profile",
                      action="store_true",
                      help="Enable basic block profiling for SimPoints")
    parser.add_option("--simpoint-interval",
                      type="int",
                      default=10000000,
                      help="SimPoint interval in num of instructions")
    parser.add_option(
        "--take-simpoint-checkpoints",
        action="store",
        type="string",
        help="<simpoint file,weight file,interval-length,warmup-length>")
    parser.add_option("--restore-simpoint-checkpoint",
                      action="store_true",
                      help="restore from a simpoint checkpoint taken with " +
                      "--take-simpoint-checkpoints")

    # Checkpointing options
    ###Note that performing checkpointing via python script files will override
    ###checkpoint instructions built into binaries.
    parser.add_option(
        "--take-checkpoints",
        action="store",
        type="string",
        help="<M,N> take checkpoints at tick M and every N ticks thereafter")
    parser.add_option("--max-checkpoints",
                      action="store",
                      type="int",
                      help="the maximum number of checkpoints to drop",
                      default=5)
    parser.add_option("--checkpoint-dir",
                      action="store",
                      type="string",
                      help="Place all checkpoints in this absolute directory")
    parser.add_option("-r",
                      "--checkpoint-restore",
                      action="store",
                      type="int",
                      help="restore from checkpoint <N>")
    parser.add_option("--checkpoint-at-end",
                      action="store_true",
                      help="take a checkpoint at end of run")
    parser.add_option("--work-begin-checkpoint-count",
                      action="store",
                      type="int",
                      help="checkpoint at specified work begin count")
    parser.add_option("--work-end-checkpoint-count",
                      action="store",
                      type="int",
                      help="checkpoint at specified work end count")
    parser.add_option(
        "--work-cpus-checkpoint-count",
        action="store",
        type="int",
        help="checkpoint and exit when active cpu count is reached")
    parser.add_option("--restore-with-cpu",
                      action="store",
                      type="choice",
                      default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help="cpu type for restoring from a checkpoint")

    # CPU Switching - default switch model goes from a checkpoint
    # to a timing simple CPU with caches to warm up, then to detailed CPU for
    # data measurement
    parser.add_option(
        "--repeat-switch",
        action="store",
        type="int",
        default=None,
        help="switch back and forth between CPUs with period <N>")
    parser.add_option(
        "-s",
        "--standard-switch",
        action="store",
        type="int",
        default=None,
        help="switch from timing to Detailed CPU after warmup period of <N>")
    parser.add_option("-p",
                      "--prog-interval",
                      type="str",
                      help="CPU Progress Interval")

    # Fastforwarding and simpoint related materials
    parser.add_option(
        "-W",
        "--warmup-insts",
        action="store",
        type="int",
        default=None,
        help="Warmup period in total instructions (requires --standard-switch)"
    )
    parser.add_option(
        "--bench",
        action="store",
        type="string",
        default=None,
        help="base names for --take-checkpoint and --checkpoint-restore")
    parser.add_option(
        "-F",
        "--fast-forward",
        action="store",
        type="string",
        default=None,
        help="Number of instructions to fast forward before switching")
    parser.add_option(
        "-S",
        "--simpoint",
        action="store_true",
        default=False,
        help="""Use workload simpoints as an instruction offset for
                --checkpoint-restore or --take-checkpoint.""")
    parser.add_option(
        "--at-instruction",
        action="store_true",
        default=False,
        help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                number of instructions.""")
    parser.add_option(
        "--spec-input",
        default="ref",
        type="choice",
        choices=["ref", "test", "train", "smred", "mdred", "lgred"],
        help="Input set size for SPEC CPU2000 benchmarks.")
    parser.add_option("--arm-iset",
                      default="arm",
                      type="choice",
                      choices=["arm", "thumb", "aarch64"],
                      help="ARM instruction set.")
Beispiel #32
0
def build_test_system(np):
    if buildEnv['TARGET_ISA'] == "alpha":
        test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0], options.ruby)
    elif buildEnv['TARGET_ISA'] == "mips":
        test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0])
    elif buildEnv['TARGET_ISA'] == "sparc":
        test_sys = makeSparcSystem(test_mem_mode, bm[0])
    elif buildEnv['TARGET_ISA'] == "x86":
        test_sys = makeLinuxX86System(test_mem_mode, options.num_cpus, bm[0],
                                      options.ruby)
    elif buildEnv['TARGET_ISA'] == "arm":
        test_sys = makeArmSystem(test_mem_mode,
                                 options.machine_type,
                                 bm[0],
                                 options.dtb_filename,
                                 bare_metal=options.bare_metal,
                                 sdcard_image=options.sdcard_image)
        if options.enable_context_switch_stats_dump:
            test_sys.enable_context_switch_stats_dump = True
    else:
        fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA'])

    # Set the cache line size for the entire system
    test_sys.cache_line_size = options.cacheline_size

    # Create a top-level voltage domain
    test_sys.voltage_domain = VoltageDomain(voltage=options.sys_voltage)

    # Create a source clock for the system and set the clock period
    test_sys.clk_domain = SrcClockDomain(
        clock=options.sys_clock, voltage_domain=test_sys.voltage_domain)

    #Create a clk running contantly at 1.4GHz for L2
    test_sys.clk_domain_const = SrcClockDomain(
        clock=["1.4GHz"], voltage_domain=test_sys.voltage_domain)

    # Create a CPU voltage domain
    test_sys.cpu_voltage_domain = VoltageDomain()

    # Create a source clock for the CPUs and set the clock period
    #test_sys.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock,
    #                                         voltage_domain =
    #                                         test_sys.cpu_voltage_domain)
    #test_sys.cpu_clk_domain = SrcClockDomain(clock = ["3GHz","2GHz","1GHz"],
    test_sys.cpu_clk_domain = SrcClockDomain(
        clock=[
            "1.4GHz", "1.3GHz", "1.2GHz", "1.1GHz", "1GHz", "0.9GHz", "0.8GHz",
            "0.7GHz", "0.6GHz", "0.5GHz", "0.4GHz", "0.3GHz", "0.2GHz"
        ],
        voltage_domain=test_sys.cpu_voltage_domain,
        domain_id=0)

    test_sys.cpu_clk_domain1 = SrcClockDomain(
        clock=[
            "1.4GHz", "1.3GHz", "1.2GHz", "1.1GHz", "1GHz", "0.9GHz", "0.8GHz",
            "0.7GHz", "0.6GHz", "0.5GHz", "0.4GHz", "0.3GHz", "0.2GHz"
        ],
        voltage_domain=test_sys.cpu_voltage_domain,
        domain_id=1)

    test_sys.cpu_clk_domain2 = SrcClockDomain(
        clock=[
            "1.4GHz", "1.3GHz", "1.2GHz", "1.1GHz", "1GHz", "0.9GHz", "0.8GHz",
            "0.7GHz", "0.6GHz", "0.5GHz", "0.4GHz", "0.3GHz", "0.2GHz"
        ],
        voltage_domain=test_sys.cpu_voltage_domain,
        domain_id=2)

    test_sys.cpu_clk_domain3 = SrcClockDomain(
        clock=[
            "1.4GHz", "1.3GHz", "1.2GHz", "1.1GHz", "1GHz", "0.9GHz", "0.8GHz",
            "0.7GHz", "0.6GHz", "0.5GHz", "0.4GHz", "0.3GHz", "0.2GHz"
        ],
        voltage_domain=test_sys.cpu_voltage_domain,
        domain_id=3)

    if options.kernel is not None:
        test_sys.kernel = binary(options.kernel)

    if options.script is not None:
        test_sys.readfile = options.script

    if options.lpae:
        test_sys.have_lpae = True

    if options.virtualisation:
        test_sys.have_virtualization = True

    test_sys.init_param = options.init_param

    # For now, assign all the CPUs to the same clock domain
    #test_sys.cpu = [TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
    #                for i in xrange(np)]

    test_sys.cpu = [
        TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=0,
                     socket_id=0),
        TestCPUClass(clk_domain=test_sys.cpu_clk_domain1,
                     cpu_id=1,
                     socket_id=1),
        TestCPUClass(clk_domain=test_sys.cpu_clk_domain2,
                     cpu_id=2,
                     socket_id=2),
        TestCPUClass(clk_domain=test_sys.cpu_clk_domain3,
                     cpu_id=3,
                     socket_id=3)
    ]

    if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass):
        test_sys.vm = KvmVM()

    test_sys.dvfs_handler.enable = True
    test_sys.dvfs_handler.transform_enable = True  # We do want O3 CPU to transform
    test_sys.dvfs_handler.domains = [
        test_sys.cpu_clk_domain, test_sys.cpu_clk_domain1,
        test_sys.cpu_clk_domain2, test_sys.cpu_clk_domain3
    ]

    if options.ruby:
        # Check for timing mode because ruby does not support atomic accesses
        if not (options.cpu_type == "detailed"
                or options.cpu_type == "timing"):
            print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!"
            sys.exit(1)

        Ruby.create_system(options, test_sys, test_sys.iobus,
                           test_sys._dma_ports)

        # Create a seperate clock domain for Ruby
        test_sys.ruby.clk_domain = SrcClockDomain(
            clock=options.ruby_clock, voltage_domain=test_sys.voltage_domain)

        for (i, cpu) in enumerate(test_sys.cpu):
            #
            # Tie the cpu ports to the correct ruby system ports
            #
            cpu.clk_domain = test_sys.cpu_clk_domain
            cpu.createThreads()
            cpu.createInterruptController()

            cpu.icache_port = test_sys.ruby._cpu_ports[i].slave
            cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave

            if buildEnv['TARGET_ISA'] == "x86":
                cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave
                cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave

                cpu.interrupts.pio = test_sys.ruby._cpu_ports[i].master
                cpu.interrupts.int_master = test_sys.ruby._cpu_ports[i].slave
                cpu.interrupts.int_slave = test_sys.ruby._cpu_ports[i].master

            test_sys.ruby._cpu_ports[i].access_phys_mem = True

        # Create the appropriate memory controllers
        # and connect them to the IO bus
        test_sys.mem_ctrls = [
            TestMemClass(range=r) for r in test_sys.mem_ranges
        ]
        for i in xrange(len(test_sys.mem_ctrls)):
            test_sys.mem_ctrls[i].port = test_sys.iobus.master

    else:
        if options.caches or options.l2cache:
            # By default the IOCache runs at the system clock
            test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges)
            test_sys.iocache.cpu_side = test_sys.iobus.master
            test_sys.iocache.mem_side = test_sys.membus.slave
        else:
            test_sys.iobridge = Bridge(delay='50ns',
                                       ranges=test_sys.mem_ranges)
            test_sys.iobridge.slave = test_sys.iobus.master
            test_sys.iobridge.master = test_sys.membus.slave

        # Sanity check
        if options.fastmem:
            if TestCPUClass != AtomicSimpleCPU:
                fatal("Fastmem can only be used with atomic CPU!")
            if (options.caches or options.l2cache):
                fatal("You cannot use fastmem in combination with caches!")

        for i in xrange(np):
            if options.fastmem:
                test_sys.cpu[i].fastmem = True
            if options.checker:
                test_sys.cpu[i].addCheckerCpu()
            test_sys.cpu[i].createThreads()

        CacheConfig.config_cache(options, test_sys)
        MemConfig.config_mem(options, test_sys)

    return test_sys
Beispiel #33
0
def create_system(options, full_system, system, dma_devices, ruby_system):

    if not buildEnv['GPGPU_SIM']:
        m5.util.panic(
            "This script requires GPGPU-Sim integration to be built.")

    # Run the protocol script to setup CPU cluster, directory and DMA
    (all_sequencers, dir_cntrls, dma_cntrls, cpu_cluster) = \
                                        VI_hammer.create_system(options,
                                                                full_system,
                                                                system,
                                                                dma_devices,
                                                                ruby_system)

    # If we're going to split the directories/memory controllers
    if options.num_dev_dirs > 0:
        cpu_cntrl_count = len(cpu_cluster)
    else:
        cpu_cntrl_count = len(cpu_cluster) + len(dir_cntrls)

    #
    # Create controller for the copy engine to connect to in CPU cluster
    # Cache is unused by controller
    #
    cache = L1Cache(size="4096B", assoc=2)

    cpu_ce_seq = RubySequencer(version=options.num_cpus + options.num_sc,
                               icache=cache,
                               dcache=cache,
                               max_outstanding_requests=64,
                               ruby_system=ruby_system,
                               connect_to_io=False)

    cpu_ce_cntrl = GPUCopyDMA_Controller(version=0,
                                         sequencer=cpu_ce_seq,
                                         number_of_TBEs=256,
                                         transitions_per_cycle=options.ports,
                                         ruby_system=ruby_system)

    cpu_ce_cntrl.responseFromDir = MessageBuffer(ordered=True)
    cpu_ce_cntrl.responseFromDir.slave = ruby_system.network.master
    cpu_ce_cntrl.reqToDirectory = MessageBuffer(ordered=True)
    cpu_ce_cntrl.reqToDirectory.master = ruby_system.network.slave

    cpu_ce_cntrl.mandatoryQueue = MessageBuffer()

    ruby_system.ce_cntrl = cpu_ce_cntrl

    cpu_cntrl_count += 1

    #
    # Build GPU cluster
    #
    gpu_cluster = Cluster(intBW=32, extBW=32)
    gpu_cluster.disableConnectToParent()

    l2_bits = int(math.log(options.num_l2caches, 2))
    block_size_bits = int(math.log(options.cacheline_size, 2))
    # This represents the L1 to L2 interconnect latency
    # NOTE! This latency is in Ruby (cache) cycles, not SM cycles
    per_hop_interconnect_latency = 45  # ~15 GPU cycles
    num_dance_hall_hops = int(math.log(options.num_sc, 2))
    if num_dance_hall_hops == 0:
        num_dance_hall_hops = 1
    l1_to_l2_noc_latency = per_hop_interconnect_latency * num_dance_hall_hops

    #
    # Caches for GPU cores
    #
    for i in xrange(options.num_sc):
        #
        # First create the Ruby objects associated with the GPU cores
        #
        cache = L1Cache(size=options.sc_l1_size,
                        assoc=options.sc_l1_assoc,
                        replacement_policy=LRUReplacementPolicy(),
                        start_index_bit=block_size_bits,
                        dataArrayBanks=4,
                        tagArrayBanks=4,
                        dataAccessLatency=4,
                        tagAccessLatency=4,
                        resourceStalls=False)

        l1_cntrl = GPUL1Cache_Controller(
            version=i,
            cache=cache,
            l2_select_num_bits=l2_bits,
            num_l2=options.num_l2caches,
            transitions_per_cycle=options.ports,
            issue_latency=l1_to_l2_noc_latency,
            number_of_TBEs=options.gpu_l1_buf_depth,
            ruby_system=ruby_system)

        gpu_seq = RubySequencer(
            version=options.num_cpus + i,
            icache=cache,
            dcache=cache,
            max_outstanding_requests=options.gpu_l1_buf_depth,
            ruby_system=ruby_system,
            deadlock_threshold=2000000,
            connect_to_io=False)

        l1_cntrl.sequencer = gpu_seq

        exec("ruby_system.l1_cntrl_sp%02d = l1_cntrl" % i)

        #
        # Add controllers and sequencers to the appropriate lists
        #
        all_sequencers.append(gpu_seq)
        gpu_cluster.add(l1_cntrl)

        # Connect the controller to the network
        l1_cntrl.requestFromL1Cache = MessageBuffer(ordered=True)
        l1_cntrl.requestFromL1Cache.master = ruby_system.network.slave
        l1_cntrl.responseToL1Cache = MessageBuffer(ordered=True)
        l1_cntrl.responseToL1Cache.slave = ruby_system.network.master

        l1_cntrl.mandatoryQueue = MessageBuffer()

    l2_index_start = block_size_bits + l2_bits
    # Use L2 cache and interconnect latencies to calculate protocol latencies
    # NOTE! These latencies are in Ruby (cache) cycles, not SM cycles
    l2_cache_access_latency = 30  # ~10 GPU cycles
    l2_to_l1_noc_latency = per_hop_interconnect_latency * num_dance_hall_hops
    l2_to_mem_noc_latency = 125  # ~40 GPU cycles

    l2_clusters = []
    for i in xrange(options.num_l2caches):
        #
        # First create the Ruby objects associated with this cpu
        #
        l2_cache = L2Cache(size=options.sc_l2_size,
                           assoc=options.sc_l2_assoc,
                           start_index_bit=l2_index_start,
                           replacement_policy=LRUReplacementPolicy(),
                           dataArrayBanks=4,
                           tagArrayBanks=4,
                           dataAccessLatency=4,
                           tagAccessLatency=4,
                           resourceStalls=options.gpu_l2_resource_stalls)

        l2_cntrl = GPUL2Cache_Controller(
            version=i,
            L2cache=l2_cache,
            transitions_per_cycle=options.ports,
            l2_response_latency=l2_cache_access_latency + l2_to_l1_noc_latency,
            l2_request_latency=l2_to_mem_noc_latency,
            cache_response_latency=l2_cache_access_latency,
            ruby_system=ruby_system)

        exec("ruby_system.l2_cntrl%d = l2_cntrl" % i)
        l2_cluster = Cluster(intBW=32, extBW=32)
        l2_cluster.add(l2_cntrl)
        gpu_cluster.add(l2_cluster)
        l2_clusters.append(l2_cluster)

        # Connect the controller to the network
        l2_cntrl.responseToL1Cache = MessageBuffer(ordered=True)
        l2_cntrl.responseToL1Cache.master = ruby_system.network.slave
        l2_cntrl.requestFromCache = MessageBuffer()
        l2_cntrl.requestFromCache.master = ruby_system.network.slave
        l2_cntrl.responseFromCache = MessageBuffer()
        l2_cntrl.responseFromCache.master = ruby_system.network.slave
        l2_cntrl.unblockFromCache = MessageBuffer()
        l2_cntrl.unblockFromCache.master = ruby_system.network.slave

        l2_cntrl.requestFromL1Cache = MessageBuffer(ordered=True)
        l2_cntrl.requestFromL1Cache.slave = ruby_system.network.master
        l2_cntrl.forwardToCache = MessageBuffer()
        l2_cntrl.forwardToCache.slave = ruby_system.network.master
        l2_cntrl.responseToCache = MessageBuffer()
        l2_cntrl.responseToCache.slave = ruby_system.network.master

        l2_cntrl.triggerQueue = MessageBuffer()

    gpu_phys_mem_size = system.gpu.gpu_memory_range.size()

    if options.num_dev_dirs > 0:
        mem_module_size = gpu_phys_mem_size / options.num_dev_dirs

        #
        # determine size and index bits for probe filter
        # By default, the probe filter size is configured to be twice the
        # size of the L2 cache.
        #
        pf_size = MemorySize(options.sc_l2_size)
        pf_size.value = pf_size.value * 2
        dir_bits = int(math.log(options.num_dev_dirs, 2))
        pf_bits = int(math.log(pf_size.value, 2))
        if options.numa_high_bit:
            if options.pf_on or options.dir_on:
                # if numa high bit explicitly set, make sure it does not overlap
                # with the probe filter index
                assert (options.numa_high_bit - dir_bits > pf_bits)

            # set the probe filter start bit to just above the block offset
            pf_start_bit = block_size_bits
        else:
            if dir_bits > 0:
                pf_start_bit = dir_bits + block_size_bits - 1
            else:
                pf_start_bit = block_size_bits

        dev_dir_cntrls = []
        dev_mem_ctrls = []
        num_cpu_dirs = len(dir_cntrls)
        for i in xrange(options.num_dev_dirs):
            #
            # Create the Ruby objects associated with the directory controller
            #

            dir_version = i + num_cpu_dirs

            dir_size = MemorySize('0B')
            dir_size.value = mem_module_size

            pf = ProbeFilter(size=pf_size,
                             assoc=4,
                             start_index_bit=pf_start_bit)

            dev_dir_cntrl = Directory_Controller(version = dir_version,
                                 directory = \
                                 RubyDirectoryMemory( \
                                            version = dir_version,
                                            size = dir_size,
                                            numa_high_bit = \
                                            options.numa_high_bit,
                                            device_directory = True),
                                 probeFilter = pf,
                                 probe_filter_enabled = options.pf_on,
                                 full_bit_dir_enabled = options.dir_on,
                                 transitions_per_cycle = options.ports,
                                 ruby_system = ruby_system)

            if options.recycle_latency:
                dev_dir_cntrl.recycle_latency = options.recycle_latency

            exec("ruby_system.dev_dir_cntrl%d = dev_dir_cntrl" % i)
            dev_dir_cntrls.append(dev_dir_cntrl)

            # Connect the directory controller to the network
            dev_dir_cntrl.forwardFromDir = MessageBuffer()
            dev_dir_cntrl.forwardFromDir.master = ruby_system.network.slave
            dev_dir_cntrl.responseFromDir = MessageBuffer()
            dev_dir_cntrl.responseFromDir.master = ruby_system.network.slave
            dev_dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True)
            dev_dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave

            dev_dir_cntrl.unblockToDir = MessageBuffer()
            dev_dir_cntrl.unblockToDir.slave = ruby_system.network.master
            dev_dir_cntrl.responseToDir = MessageBuffer()
            dev_dir_cntrl.responseToDir.slave = ruby_system.network.master
            dev_dir_cntrl.requestToDir = MessageBuffer()
            dev_dir_cntrl.requestToDir.slave = ruby_system.network.master
            dev_dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True)
            dev_dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master

            dev_dir_cntrl.triggerQueue = MessageBuffer(ordered=True)
            dev_dir_cntrl.responseFromMemory = MessageBuffer()

            dev_mem_ctrl = MemConfig.create_mem_ctrl(
                MemConfig.get(options.mem_type),
                system.gpu.gpu_memory_range, i, options.num_dev_dirs,
                int(math.log(options.num_dev_dirs, 2)), options.cacheline_size)
            dev_mem_ctrl.port = dev_dir_cntrl.memory
            dev_mem_ctrls.append(dev_mem_ctrl)

        system.dev_mem_ctrls = dev_mem_ctrls
    else:
        # Since there are no device directories, use CPU directories
        # Fix up the memory sizes of the CPU directories
        num_dirs = len(dir_cntrls)
        add_gpu_mem = gpu_phys_mem_size / num_dirs
        for cntrl in dir_cntrls:
            new_size = cntrl.directory.size.value + add_gpu_mem
            cntrl.directory.size.value = new_size

    #
    # Create controller for the copy engine to connect to in GPU cluster
    # Cache is unused by controller
    #
    cache = L1Cache(size="4096B", assoc=2)

    gpu_ce_seq = RubySequencer(version=options.num_cpus + options.num_sc + 1,
                               icache=cache,
                               dcache=cache,
                               max_outstanding_requests=64,
                               support_inst_reqs=False,
                               ruby_system=ruby_system,
                               connect_to_io=False)

    gpu_ce_cntrl = GPUCopyDMA_Controller(version=1,
                                         sequencer=gpu_ce_seq,
                                         number_of_TBEs=256,
                                         transitions_per_cycle=options.ports,
                                         ruby_system=ruby_system)

    ruby_system.dev_ce_cntrl = gpu_ce_cntrl

    all_sequencers.append(cpu_ce_seq)
    all_sequencers.append(gpu_ce_seq)

    gpu_ce_cntrl.responseFromDir = MessageBuffer(ordered=True)
    gpu_ce_cntrl.responseFromDir.slave = ruby_system.network.master
    gpu_ce_cntrl.reqToDirectory = MessageBuffer(ordered=True)
    gpu_ce_cntrl.reqToDirectory.master = ruby_system.network.slave

    gpu_ce_cntrl.mandatoryQueue = MessageBuffer()

    complete_cluster = Cluster(intBW=32, extBW=32)
    complete_cluster.add(cpu_ce_cntrl)
    complete_cluster.add(gpu_ce_cntrl)
    complete_cluster.add(cpu_cluster)
    complete_cluster.add(gpu_cluster)

    for cntrl in dir_cntrls:
        complete_cluster.add(cntrl)

    for cntrl in dev_dir_cntrls:
        complete_cluster.add(cntrl)

    for cntrl in dma_cntrls:
        complete_cluster.add(cntrl)

    for cluster in l2_clusters:
        complete_cluster.add(cluster)

    return (all_sequencers, dir_cntrls, complete_cluster)
Beispiel #34
0
def build_test_system(np):
    cmdline = cmd_line_template()
    if buildEnv['TARGET_ISA'] == "alpha":
        test_sys = makeLinuxAlphaSystem(test_mem_mode,
                                        bm[0],
                                        options.ruby,
                                        cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "mips":
        test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "sparc":
        test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "x86":
        test_sys = makeLinuxX86System(test_mem_mode,
                                      options.num_cpus,
                                      bm[0],
                                      options.ruby,
                                      cmdline=cmdline)
    elif buildEnv['TARGET_ISA'] == "arm":
        test_sys = makeArmSystem(
            test_mem_mode,
            options.machine_type,
            options.num_cpus,
            bm[0],
            options.dtb_filename,
            bare_metal=options.bare_metal,
            cmdline=cmdline,
            external_memory=options.external_memory_system)
        if options.enable_context_switch_stats_dump:
            test_sys.enable_context_switch_stats_dump = True
    else:
        fatal("Incapable of building %s full system!", buildEnv['TARGET_ISA'])

    # Set the cache line size for the entire system
    test_sys.cache_line_size = options.cacheline_size

    # Create a top-level voltage domain
    test_sys.voltage_domain = VoltageDomain(voltage=options.sys_voltage)

    # Create a source clock for the system and set the clock period
    test_sys.clk_domain = SrcClockDomain(
        clock=options.sys_clock, voltage_domain=test_sys.voltage_domain)

    # Create a CPU voltage domain
    test_sys.cpu_voltage_domain = VoltageDomain()

    # Create a source clock for the CPUs and set the clock period
    test_sys.cpu_clk_domain = SrcClockDomain(
        clock=options.cpu_clock, voltage_domain=test_sys.cpu_voltage_domain)

    if options.kernel is not None:
        test_sys.kernel = binary(options.kernel)

    if options.script is not None:
        test_sys.readfile = options.script

    if options.lpae:
        test_sys.have_lpae = True

    if options.virtualisation:
        test_sys.have_virtualization = True

    #change the bootloader here
    #print "change boot loader"
    #print test_sys.boot_loader
    test_sys.boot_loader = options.issd_bootloader
    #print test_sys.boot_loader

    test_sys.init_param = options.init_param

    # For now, assign all the CPUs to the same clock domain
    test_sys.cpu = [
        TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
        for i in xrange(np)
    ]

    if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass):
        test_sys.vm = KvmVM()

    if options.ruby:
        # Check for timing mode because ruby does not support atomic accesses
        if not (options.cpu_type == "detailed"
                or options.cpu_type == "timing"):
            print >> sys.stderr, "Ruby requires TimingSimpleCPU or O3CPU!!"
            sys.exit(1)

        Ruby.create_system(options, True, test_sys, test_sys.iobus,
                           test_sys._dma_ports)

        # Create a seperate clock domain for Ruby
        test_sys.ruby.clk_domain = SrcClockDomain(
            clock=options.ruby_clock, voltage_domain=test_sys.voltage_domain)

        # Connect the ruby io port to the PIO bus,
        # assuming that there is just one such port.
        test_sys.iobus.master = test_sys.ruby._io_port.slave

        for (i, cpu) in enumerate(test_sys.cpu):
            #
            # Tie the cpu ports to the correct ruby system ports
            #
            cpu.clk_domain = test_sys.cpu_clk_domain
            cpu.createThreads()
            cpu.createInterruptController()

            cpu.icache_port = test_sys.ruby._cpu_ports[i].slave
            cpu.dcache_port = test_sys.ruby._cpu_ports[i].slave

            if buildEnv['TARGET_ISA'] == "x86":
                cpu.itb.walker.port = test_sys.ruby._cpu_ports[i].slave
                cpu.dtb.walker.port = test_sys.ruby._cpu_ports[i].slave

                cpu.interrupts.pio = test_sys.ruby._cpu_ports[i].master
                cpu.interrupts.int_master = test_sys.ruby._cpu_ports[i].slave
                cpu.interrupts.int_slave = test_sys.ruby._cpu_ports[i].master

    else:
        if options.caches or options.l2cache:
            # By default the IOCache runs at the system clock
            test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges)
            test_sys.iocache.cpu_side = test_sys.iobus.master
            test_sys.iocache.mem_side = test_sys.membus.slave
        elif not options.external_memory_system:
            test_sys.iobridge = Bridge(delay='50ns',
                                       ranges=test_sys.mem_ranges)
            test_sys.iobridge.slave = test_sys.iobus.master
            test_sys.iobridge.master = test_sys.membus.slave

        # Sanity check
        if options.fastmem:
            if TestCPUClass != AtomicSimpleCPU:
                fatal("Fastmem can only be used with atomic CPU!")
            if (options.caches or options.l2cache):
                fatal("You cannot use fastmem in combination with caches!")

        if options.simpoint_profile:
            if not options.fastmem:
                # Atomic CPU checked with fastmem option already
                fatal(
                    "SimPoint generation should be done with atomic cpu and fastmem"
                )
            if np > 1:
                fatal(
                    "SimPoint generation not supported with more than one CPUs"
                )

        for i in xrange(np):
            if options.fastmem:
                test_sys.cpu[i].fastmem = True
            if options.simpoint_profile:
                test_sys.cpu[i].addSimPointProbe(options.simpoint_interval)
            if options.checker:
                test_sys.cpu[i].addCheckerCpu()
            test_sys.cpu[i].createThreads()

        CacheConfig.config_cache(options, test_sys)
        MemConfig.config_mem(options, test_sys)

    return test_sys
Beispiel #35
0
def addCommonOptions(parser):
    # system options
    parser.add_option("--list-cpu-types",
                      action="callback", callback=_listCpuTypes,
                      help="List available CPU types")
    parser.add_option("--cpu-type", type="choice", default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help = "type of cpu to run with")
    parser.add_option("--checker", action="store_true");
    parser.add_option("-n", "--num-cpus", type="int", default=1)
    parser.add_option("--sys-voltage", action="store", type="string",
                      default='1.0V',
                      help = """Top-level voltage for blocks running at system
                      power supply""")
    parser.add_option("--sys-clock", action="store", type="string",
                      default='1GHz',
                      help = """Top-level clock for blocks running at system
                      speed""")
    parser.add_option("--cpu-clock", action="store", type="string",
                      default='2GHz',
                      help="Clock for blocks running at CPU speed")
    parser.add_option("--smt", action="store_true", default=False,
                      help = """
                      Only used if multiple programs are specified. If true,
                      then the number of threads per cpu is same as the
                      number of programs.""")

    # Memory Options
    parser.add_option("--list-mem-types",
                      action="callback", callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64",
                      choices=MemConfig.mem_names(),
                      help = "type of memory to use")
    parser.add_option("--mem-channels", type="int", default=1,
                      help = "number of memory channels")
    parser.add_option("--mem-ranks", type="int", default=None,
                      help = "number of memory ranks per channel")
    parser.add_option("--mem-size", action="store", type="string",
                      default="512MB",
                      help="Specify the physical memory size (single memory)")

    parser.add_option("-l", "--lpae", action="store_true")
    parser.add_option("-V", "--virtualisation", action="store_true")

    parser.add_option("--memchecker", action="store_true")

    # Cache Options
    parser.add_option("--external-memory-system", type="string",
                      help="use external ports of this port_type for caches")
    parser.add_option("--tlm-memory", type="string",
                      help="use external port for SystemC TLM cosimulation")
    parser.add_option("--caches", action="store_true")
    parser.add_option("--l2cache", action="store_true")
    parser.add_option("--fastmem", action="store_true")
    parser.add_option("--num-dirs", type="int", default=1)
    parser.add_option("--num-l2caches", type="int", default=1)
    parser.add_option("--num-l3caches", type="int", default=1)
    parser.add_option("--l1d_size", type="string", default="64kB")
    parser.add_option("--l1i_size", type="string", default="32kB")
    parser.add_option("--l2_size", type="string", default="2MB")
    parser.add_option("--l3_size", type="string", default="16MB")
    parser.add_option("--l1d_assoc", type="int", default=2)
    parser.add_option("--l1i_assoc", type="int", default=2)
    parser.add_option("--l2_assoc", type="int", default=8)
    parser.add_option("--l3_assoc", type="int", default=16)
    parser.add_option("--l2_tags", type="string", default="LRU")
    parser.add_option("--cacheline_size", type="int", default=64)

    # Enable Ruby
    parser.add_option("--ruby", action="store_true")

    # Run duration options
    parser.add_option("-m", "--abs-max-tick", type="int", default=m5.MaxTick,
                      metavar="TICKS", help="Run to absolute simulated tick " \
                      "specified including ticks from a restored checkpoint")
    parser.add_option("--rel-max-tick", type="int", default=None,
                      metavar="TICKS", help="Simulate for specified number of" \
                      " ticks relative to the simulation start tick (e.g. if " \
                      "restoring a checkpoint)")
    parser.add_option("--maxtime", type="float", default=None,
                      help="Run to the specified absolute simulated time in " \
                      "seconds")
    parser.add_option("-I", "--maxinsts", action="store", type="int",
                      default=None, help="""Total number of instructions to
                                            simulate (default: run forever)""")
    parser.add_option("--work-item-id", action="store", type="int",
                      help="the specific work id for exit & checkpointing")
    parser.add_option("--num-work-ids", action="store", type="int",
                      help="Number of distinct work item types")
    parser.add_option("--work-begin-cpu-id-exit", action="store", type="int",
                      help="exit when work starts on the specified cpu")
    parser.add_option("--work-end-exit-count", action="store", type="int",
                      help="exit at specified work end count")
    parser.add_option("--work-begin-exit-count", action="store", type="int",
                      help="exit at specified work begin count")
    parser.add_option("--init-param", action="store", type="int", default=0,
                      help="""Parameter available in simulation with m5
                              initparam""")
    parser.add_option("--initialize-only", action="store_true", default=False,
                      help="""Exit after initialization. Do not simulate time.
                              Useful when gem5 is run as a library.""")

    # Simpoint options
    parser.add_option("--simpoint-profile", action="store_true",
                      help="Enable basic block profiling for SimPoints")
    parser.add_option("--simpoint-interval", type="int", default=10000000,
                      help="SimPoint interval in num of instructions")
    parser.add_option("--take-simpoint-checkpoints", action="store", type="string",
        help="<simpoint file,weight file,interval-length,warmup-length>")
    parser.add_option("--restore-simpoint-checkpoint", action="store_true",
        help="restore from a simpoint checkpoint taken with " +
             "--take-simpoint-checkpoints")

    # Checkpointing options
    ###Note that performing checkpointing via python script files will override
    ###checkpoint instructions built into binaries.
    parser.add_option("--take-checkpoints", action="store", type="string",
        help="<M,N> take checkpoints at tick M and every N ticks thereafter")
    parser.add_option("--max-checkpoints", action="store", type="int",
        help="the maximum number of checkpoints to drop", default=5)
    parser.add_option("--checkpoint-dir", action="store", type="string",
        help="Place all checkpoints in this absolute directory")
    parser.add_option("-r", "--checkpoint-restore", action="store", type="int",
        help="restore from checkpoint <N>")
    parser.add_option("--checkpoint-at-end", action="store_true",
                      help="take a checkpoint at end of run")
    parser.add_option("--work-begin-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work begin count")
    parser.add_option("--work-end-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work end count")
    parser.add_option("--work-cpus-checkpoint-count", action="store", type="int",
                      help="checkpoint and exit when active cpu count is reached")
    parser.add_option("--restore-with-cpu", action="store", type="choice",
                      default="atomic", choices=CpuConfig.cpu_names(),
                      help = "cpu type for restoring from a checkpoint")


    # CPU Switching - default switch model goes from a checkpoint
    # to a timing simple CPU with caches to warm up, then to detailed CPU for
    # data measurement
    parser.add_option("--repeat-switch", action="store", type="int",
        default=None,
        help="switch back and forth between CPUs with period <N>")
    parser.add_option("-s", "--standard-switch", action="store", type="int",
        default=None,
        help="switch from timing to Detailed CPU after warmup period of <N>")
    parser.add_option("-p", "--prog-interval", type="str",
        help="CPU Progress Interval")
    parser.add_option("--two-phase", action="store",
                      help="switch from atomic to timing CPU after kernel boots")

    # Fastforwarding and simpoint related materials
    parser.add_option("-W", "--warmup-insts", action="store", type="int",
        default=None,
        help="Warmup period in total instructions (requires --standard-switch)")
    parser.add_option("--bench", action="store", type="string", default=None,
        help="base names for --take-checkpoint and --checkpoint-restore")
    parser.add_option("-F", "--fast-forward", action="store", type="string",
        default=None,
        help="Number of instructions to fast forward before switching")
    parser.add_option("-S", "--simpoint", action="store_true", default=False,
        help="""Use workload simpoints as an instruction offset for
                --checkpoint-restore or --take-checkpoint.""")
    parser.add_option("--at-instruction", action="store_true", default=False,
        help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                number of instructions.""")
    parser.add_option("--spec-input", default="ref", type="choice",
                      choices=["ref", "test", "train", "smred", "mdred",
                               "lgred"],
                      help="Input set size for SPEC CPU2000 benchmarks.")
    parser.add_option("--arm-iset", default="arm", type="choice",
                      choices=["arm", "thumb", "aarch64"],
                      help="ARM instruction set.")
Beispiel #36
0
    for i in xrange(np):
        ruby_port = system.ruby._cpu_ports[i]

        # Create the interrupt controller and connect its ports to Ruby
        # Note that the interrupt controller is always present but only
        # in x86 does it have message ports that need to be connected
        system.cpu[i].createInterruptController()

        # Connect the cpu's cache ports to Ruby
        system.cpu[i].icache_port = ruby_port.slave
        system.cpu[i].dcache_port = ruby_port.slave
        if buildEnv['TARGET_ISA'] == 'x86':
            system.cpu[i].interrupts.pio = ruby_port.master
            system.cpu[i].interrupts.int_master = ruby_port.slave
            system.cpu[i].interrupts.int_slave = ruby_port.master
            system.cpu[i].itb.walker.port = ruby_port.slave
            system.cpu[i].dtb.walker.port = ruby_port.slave
else: ### THIS IS WHERE WE END UP ###
    MemClass = Simulation.setMemClass(options)
    system.membus = CoherentXBar()
    system.system_port = system.membus.slave
    CacheConfig.config_cache(options, system)
    MemConfig.config_mem(options, system)


root = Root(full_system = False, system = system)
Simulation.run(options, root, system, FutureClass)



Beispiel #37
0
addToPath(os.getcwd() + '/configs/common')
import MemConfig

# This script aims at triggering low power state transitions in the DRAM
# controller. The traffic generator is used in DRAM mode and traffic
# states target a different levels of bank utilization and strides.
# At the end after sweeping through bank utilization and strides, we go
# through an idle state with no requests to enforce self-refresh.

parser = argparse.ArgumentParser(
  formatter_class=argparse.ArgumentDefaultsHelpFormatter)

# Use a single-channel DDR4-2400 in 16x4 configuration by default
parser.add_argument("--mem-type", default="DDR4_2400_16x4",
                    choices=MemConfig.mem_names(),
                    help = "type of memory to use")

parser.add_argument("--mem-ranks", "-r", type=int, default=1,
                    help = "Number of ranks to iterate across")

parser.add_argument("--page-policy", "-p",
                    choices=["close_adaptive", "open_adaptive"],
                    default="close_adaptive", help="controller page policy")

parser.add_argument("--itt-list", "-t", default="1 20 100",
                    help="a list of multipliers for the max value of itt, " \
                    "e.g. \"1 20 100\"")

parser.add_argument("--rd-perc", type=int, default=100,
                    help = "Percentage of read commands")
Beispiel #38
0
def addCommonOptions(parser):
    # system options
    parser.add_option("--list-cpu-types",
                      action="callback",
                      callback=_listCpuTypes,
                      help="List available CPU types")
    parser.add_option("--cpu-type",
                      type="choice",
                      default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help="type of cpu to run with")
    parser.add_option("--checker", action="store_true")
    parser.add_option("-n", "--num-cpus", type="int", default=1)
    parser.add_option("--sys-voltage",
                      action="store",
                      type="string",
                      default='1.0V',
                      help="""Top-level voltage for blocks running at system
                      power supply""")
    parser.add_option("--sys-clock",
                      action="store",
                      type="string",
                      default='1GHz',
                      help="""Top-level clock for blocks running at system
                      speed""")
    parser.add_option("--cpu-clock",
                      action="store",
                      type="string",
                      default='2GHz',
                      help="Clock for blocks running at CPU speed")
    parser.add_option("--smt",
                      action="store_true",
                      default=False,
                      help="""
                      Only used if multiple programs are specified. If true,
                      then the number of threads per cpu is same as the
                      number of programs.""")

    # Memory Options
    parser.add_option("--list-mem-types",
                      action="callback",
                      callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type",
                      type="choice",
                      default="ddr3_1600_x64",
                      choices=MemConfig.mem_names(),
                      help="type of memory to use")
    parser.add_option("--mem-channels",
                      type="int",
                      default=1,
                      help="number of memory channels")

    parser.add_option("--mem-numbers",
                      type="int",
                      default=1,
                      help="number of memorys")
    parser.add_option("--memsize-list",
                      type="string",
                      default="default",
                      help="set size for each memory by a list")  #### fixme
    parser.add_option("--is-pcm",
                      type="string",
                      default="default",
                      help="set is_pcm for each memory by a list")  #### fixme

    #    parser.add_option("--pcm-numb", type="int", default=0,
    #                      help = "number of pcm")
    parser.add_option(
        "--mem-size",
        action="store",
        type="string",
        help="Specify the physical memory size at one time")  #### fixme

    parser.add_option("-l", "--lpae", action="store_true")
    parser.add_option("-V", "--virtualisation", action="store_true")
    parser.add_option("-P", "--pcm", action="store_true", help="Using PCM")

    # Cache Options
    parser.add_option("--caches", action="store_true")
    parser.add_option("--l2cache", action="store_true")
    parser.add_option("--fastmem", action="store_true")
    parser.add_option("--num-dirs", type="int", default=1)
    parser.add_option("--num-l2caches", type="int", default=1)
    parser.add_option("--num-l3caches", type="int", default=1)
    parser.add_option("--l1d_size", type="string", default="64kB")
    parser.add_option("--l1i_size", type="string", default="32kB")
    parser.add_option("--l2_size", type="string", default="2MB")
    parser.add_option("--l3_size", type="string", default="16MB")
    parser.add_option("--l1d_assoc", type="int", default=2)
    parser.add_option("--l1i_assoc", type="int", default=2)
    parser.add_option("--l2_assoc", type="int", default=8)
    parser.add_option("--l3_assoc", type="int", default=16)
    parser.add_option("--cacheline_size", type="int", default=64)

    # Enable Ruby
    parser.add_option("--ruby", action="store_true")

    # Run duration options
    parser.add_option("-m", "--abs-max-tick", type="int", default=m5.MaxTick,
                      metavar="TICKS", help="Run to absolute simulated tick " \
                      "specified including ticks from a restored checkpoint")
    parser.add_option("--rel-max-tick", type="int", default=None,
                      metavar="TICKS", help="Simulate for specified number of" \
                      " ticks relative to the simulation start tick (e.g. if " \
                      "restoring a checkpoint)")
    parser.add_option("--maxtime", type="float", default=None,
                      help="Run to the specified absolute simulated time in " \
                      "seconds")
    parser.add_option("-I",
                      "--maxinsts",
                      action="store",
                      type="int",
                      default=None,
                      help="""Total number of instructions to
                                            simulate (default: run forever)""")
    parser.add_option("--work-item-id",
                      action="store",
                      type="int",
                      help="the specific work id for exit & checkpointing")
    parser.add_option("--num-work-ids",
                      action="store",
                      type="int",
                      help="Number of distinct work item types")
    parser.add_option("--work-begin-cpu-id-exit",
                      action="store",
                      type="int",
                      help="exit when work starts on the specified cpu")
    parser.add_option("--work-end-exit-count",
                      action="store",
                      type="int",
                      help="exit at specified work end count")
    parser.add_option("--work-begin-exit-count",
                      action="store",
                      type="int",
                      help="exit at specified work begin count")
    parser.add_option("--init-param",
                      action="store",
                      type="int",
                      default=0,
                      help="""Parameter available in simulation with m5
                              initparam""")

    # Simpoint options
    parser.add_option("--simpoint-profile",
                      action="store_true",
                      help="Enable basic block profiling for SimPoints")
    parser.add_option("--simpoint-interval",
                      type="int",
                      default=10000000,
                      help="SimPoint interval in num of instructions")

    # Checkpointing options
    ###Note that performing checkpointing via python script files will override
    ###checkpoint instructions built into binaries.
    parser.add_option(
        "--take-checkpoints",
        action="store",
        type="string",
        help="<M,N> take checkpoints at tick M and every N ticks thereafter")
    parser.add_option("--max-checkpoints",
                      action="store",
                      type="int",
                      help="the maximum number of checkpoints to drop",
                      default=5)
    parser.add_option("--checkpoint-dir",
                      action="store",
                      type="string",
                      help="Place all checkpoints in this absolute directory")
    parser.add_option("-r",
                      "--checkpoint-restore",
                      action="store",
                      type="int",
                      help="restore from checkpoint <N>")
    parser.add_option("--checkpoint-at-end",
                      action="store_true",
                      help="take a checkpoint at end of run")
    parser.add_option("--work-begin-checkpoint-count",
                      action="store",
                      type="int",
                      help="checkpoint at specified work begin count")
    parser.add_option("--work-end-checkpoint-count",
                      action="store",
                      type="int",
                      help="checkpoint at specified work end count")
    parser.add_option(
        "--work-cpus-checkpoint-count",
        action="store",
        type="int",
        help="checkpoint and exit when active cpu count is reached")
    parser.add_option("--restore-with-cpu",
                      action="store",
                      type="choice",
                      default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help="cpu type for restoring from a checkpoint")

    # CPU Switching - default switch model goes from a checkpoint
    # to a timing simple CPU with caches to warm up, then to detailed CPU for
    # data measurement
    parser.add_option(
        "--repeat-switch",
        action="store",
        type="int",
        default=None,
        help="switch back and forth between CPUs with period <N>")
    parser.add_option(
        "-s",
        "--standard-switch",
        action="store",
        type="int",
        default=None,
        help="switch from timing to Detailed CPU after warmup period of <N>")
    parser.add_option("-p",
                      "--prog-interval",
                      type="str",
                      help="CPU Progress Interval")

    # Fastforwarding and simpoint related materials
    parser.add_option(
        "-W",
        "--warmup-insts",
        action="store",
        type="int",
        default=None,
        help="Warmup period in total instructions (requires --standard-switch)"
    )
    parser.add_option(
        "--bench",
        action="store",
        type="string",
        default=None,
        help="base names for --take-checkpoint and --checkpoint-restore")
    parser.add_option(
        "-F",
        "--fast-forward",
        action="store",
        type="string",
        default=None,
        help="Number of instructions to fast forward before switching")
    parser.add_option(
        "-S",
        "--simpoint",
        action="store_true",
        default=False,
        help="""Use workload simpoints as an instruction offset for
                --checkpoint-restore or --take-checkpoint.""")
    parser.add_option(
        "--at-instruction",
        action="store_true",
        default=False,
        help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                number of instructions.""")
    parser.add_option(
        "--spec-input",
        default="ref",
        type="choice",
        choices=["ref", "test", "train", "smred", "mdred", "lgred"],
        help="Input set size for SPEC CPU2000 benchmarks.")
    parser.add_option("--arm-iset",
                      default="arm",
                      type="choice",
                      choices=["arm", "thumb", "aarch64"],
                      help="ARM instruction set.")
Beispiel #39
0
def addCommonOptions(parser):
    # system options
    parser.add_option("--list-cpu-types",
                      action="callback", callback=_listCpuTypes,
                      help="List available CPU types")
    parser.add_option("--cpu-type", type="choice", default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help = "type of cpu to run with")
    parser.add_option("--checker", action="store_true");
    parser.add_option("-n", "--num-cpus", type="int", default=1)
    parser.add_option("--sys-clock", action="store", type="string",
                      default='1GHz',
                      help = """Top-level clock for blocks running at system
                      speed""")
    parser.add_option("--cpu-clock", action="store", type="string",
                      default='2GHz',
                      help="Clock for blocks running at CPU speed")
    parser.add_option("--smt", action="store_true", default=False,
                      help = """
                      Only used if multiple programs are specified. If true,
                      then the number of threads per cpu is same as the
                      number of programs.""")

    # Memory Options
    parser.add_option("--list-mem-types",
                      action="callback", callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type", type="choice", default="simple_mem",
                      choices=MemConfig.mem_names(),
                      help = "type of memory to use")
    parser.add_option("--mem-size", action="store", type="string",
                      default="512MB",
                      help="Specify the physical memory size (single memory)")

    #synthesis
    parser.add_option("--syscall-dump", action="store_true", help="Dump the syscall.")
    parser.add_option("--synthesize", action="store_true", help="Generate synthesis code for a benchmark/interval.")
    parser.add_option("--synthesize-start", type="int", default=1, help="synthesis start instruction")
    parser.add_option("--synthesize-interval", type="int", default=10000000, help="synthesis window size")

    # Cache Options
    parser.add_option("--caches", action="store_true")
    parser.add_option("--l2cache", action="store_true")
    parser.add_option("--fastmem", action="store_true")
    parser.add_option("--num-dirs", type="int", default=1)
    parser.add_option("--num-l2caches", type="int", default=1)
    parser.add_option("--num-l3caches", type="int", default=1)
    parser.add_option("--l1d_size", type="string", default="64kB")
    parser.add_option("--l1i_size", type="string", default="32kB")
    parser.add_option("--l2_size", type="string", default="2MB")
    parser.add_option("--l3_size", type="string", default="16MB")
    parser.add_option("--l1d_assoc", type="int", default=2)
    parser.add_option("--l1i_assoc", type="int", default=2)
    parser.add_option("--l2_assoc", type="int", default=8)
    parser.add_option("--l3_assoc", type="int", default=16)
    parser.add_option("--cacheline_size", type="int", default=64)

    # Enable Ruby
    parser.add_option("--ruby", action="store_true")

    # Run duration options
    parser.add_option("-m", "--abs-max-tick", type="int", default=None,
                      metavar="TICKS", help="Run to absolute simulated tick " \
                      "specified including ticks from a restored checkpoint")
    parser.add_option("--rel-max-tick", type="int", default=None,
                      metavar="TICKS", help="Simulate for specified number of" \
                      " ticks relative to the simulation start tick (e.g. if " \
                      "restoring a checkpoint)")
    parser.add_option("--maxtime", type="float", default=None,
                      help="Run to the specified absolute simulated time in " \
                      "seconds")
    parser.add_option("-I", "--maxinsts", action="store", type="int",
                      default=None, help="""Total number of instructions to
                                            simulate (default: run forever)""")
    parser.add_option("--work-item-id", action="store", type="int",
                      help="the specific work id for exit & checkpointing")
    parser.add_option("--work-begin-cpu-id-exit", action="store", type="int",
                      help="exit when work starts on the specified cpu")
    parser.add_option("--work-end-exit-count", action="store", type="int",
                      help="exit at specified work end count")
    parser.add_option("--work-begin-exit-count", action="store", type="int",
                      help="exit at specified work begin count")
    parser.add_option("--init-param", action="store", type="int", default=0,
                      help="""Parameter available in simulation with m5
                              initparam""")

    # Simpoint options
    parser.add_option("--simpoint-profile", action="store_true",
                      help="Enable basic block profiling for SimPoints")
    parser.add_option("--simpoint-interval", type="int", default=10000000,
                      help="SimPoint interval in num of instructions")

    # Checkpointing options
    ###Note that performing checkpointing via python script files will override
    ###checkpoint instructions built into binaries.
    parser.add_option("--take-checkpoints", action="store", type="string",
        help="<M,N> take checkpoints at tick M and every N ticks thereafter")
    parser.add_option("--max-checkpoints", action="store", type="int",
        help="the maximum number of checkpoints to drop", default=5)
    parser.add_option("--checkpoint-dir", action="store", type="string",
        help="Place all checkpoints in this absolute directory")
    parser.add_option("-r", "--checkpoint-restore", action="store", type="int",
        help="restore from checkpoint <N>")
    parser.add_option("--checkpoint-at-end", action="store_true",
                      help="take a checkpoint at end of run")
    parser.add_option("--work-begin-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work begin count")
    parser.add_option("--work-end-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work end count")
    parser.add_option("--work-cpus-checkpoint-count", action="store", type="int",
                      help="checkpoint and exit when active cpu count is reached")
    parser.add_option("--restore-with-cpu", action="store", type="choice",
                      default="atomic", choices=CpuConfig.cpu_names(),
                      help = "cpu type for restoring from a checkpoint")


    # CPU Switching - default switch model goes from a checkpoint
    # to a timing simple CPU with caches to warm up, then to detailed CPU for
    # data measurement
    parser.add_option("--repeat-switch", action="store", type="int",
        default=None,
        help="switch back and forth between CPUs with period <N>")
    parser.add_option("-s", "--standard-switch", action="store", type="int",
        default=None,
        help="switch from timing to Detailed CPU after warmup period of <N>")
    parser.add_option("-p", "--prog-interval", type="str",
        help="CPU Progress Interval")

    # Fastforwarding and simpoint related materials
    parser.add_option("-W", "--warmup-insts", action="store", type="int",
        default=None,
        help="Warmup period in total instructions (requires --standard-switch)")
    parser.add_option("--bench", action="store", type="string", default=None,
        help="base names for --take-checkpoint and --checkpoint-restore")
    parser.add_option("-F", "--fast-forward", action="store", type="string",
        default=None,
        help="Number of instructions to fast forward before switching")
    parser.add_option("-S", "--simpoint", action="store_true", default=False,
        help="""Use workload simpoints as an instruction offset for
                --checkpoint-restore or --take-checkpoint.""")
    parser.add_option("--at-instruction", action="store_true", default=False,
        help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                number of instructions.""")
Beispiel #40
0
def setMemClass(options):
    """Returns a memory controller class."""

    return MemConfig.get(options.mem_type)
Beispiel #41
0
def setup_memory_controllers(system, ruby, dir_cntrls, options):
    ruby.block_size_bytes = options.cacheline_size
    ruby.memory_size_bits = 48
    block_size_bits = int(math.log(options.cacheline_size, 2))

    if options.numa_high_bit:
        numa_bit = options.numa_high_bit
    else:
        # if the numa_bit is not specified, set the directory bits as the
        # lowest bits above the block offset bits, and the numa_bit as the
        # highest of those directory bits
        dir_bits = int(math.log(options.num_dirs, 2))
        numa_bit = block_size_bits + dir_bits - 1

    index = 0
    mem_ctrls = []
    crossbars = []

    # Sets bits to be used for interleaving.  Creates memory controllers
    # attached to a directory controller.  A separate controller is created
    # for each address range as the abstract memory can handle only one
    # contiguous address range as of now.
    for dir_cntrl in dir_cntrls:
        # Create 1 instance of DRAMCache per directory controller
        if options.dramcache:
            dramcache_ctrl = MemConfig.create_dramcache_ctrl(
                MemConfig.get_cache(options.dramcache_type),
                system.mem_ranges[0], index, options.num_dirs,
                options.dramcache_size, options.dramcache_assoc,
                options.dramcache_block_size, options.num_cpus,
                options.dramcache_timing)

            mem_ctrls.append(dramcache_ctrl)
            dir_cntrl.memory = dramcache_ctrl.port

        dir_cntrl.directory.numa_high_bit = numa_bit

        crossbar = None
        if len(system.mem_ranges) > 1:
            # we dont support this
            fatal("system mem_ranges greater than 1")
            crossbar = IOXBar()
            crossbars.append(crossbar)
            if options.dramcache:
                dramcache_ctrl.dramcache_masterport = crossbar.slave
            else:
                dir_cntrl.memory = crossbar.slave

        for r in system.mem_ranges:
            # if dramcache exists interleave at dramcache_block_size
            if options.dramcache:
                mem_ctrl = MemConfig.create_mem_ctrl(
                    MemConfig.get(options.mem_type),
                    r, index, options.num_dirs,
                    int(math.log(options.num_dirs,
                                 2)), options.dramcache_block_size)
            else:
                mem_ctrl = MemConfig.create_mem_ctrl(
                    MemConfig.get(options.mem_type),
                    r, index, options.num_dirs,
                    int(math.log(options.num_dirs, 2)), options.cacheline_size)

            mem_ctrls.append(mem_ctrl)

            if crossbar != None:
                mem_ctrl.port = crossbar.master
            else:
                if options.dramcache:
                    mem_ctrl.port = dramcache_ctrl.dramcache_masterport
                else:
                    mem_ctrl.port = dir_cntrl.memory

        index += 1

    system.mem_ctrls = mem_ctrls

    if len(crossbars) > 0:
        ruby.crossbars = crossbars
Beispiel #42
0
def _listMemTypes(option, opt, value, parser):
    MemConfig.print_mem_list()
    sys.exit(0)
Beispiel #43
0
def addCommonOptions(parser):
    # system options
    parser.add_option("--list-cpu-types",
                      action="callback", callback=_listCpuTypes,
                      help="List available CPU types")
    parser.add_option("--cpu-type", type="choice", default="atomic",
                      choices=CpuConfig.cpu_names(),
                      help = "type of cpu to run with")
    parser.add_option("--checker", action="store_true");
    parser.add_option("-n", "--num-cpus", type="int", default=1)
    parser.add_option("--sys-voltage", action="store", type="string",
                      default='1.0V',
                      help = """Top-level voltage for blocks running at system
                      power supply""")
    parser.add_option("--sys-clock", action="store", type="string",
                      default='1GHz',
                      help = """Top-level clock for blocks running at system
                      speed""")
    parser.add_option("--cpu-clock", action="store", type="string",
                      default='1GHz',
                      help="Clock for blocks running at CPU speed")
    parser.add_option("--smt", action="store_true", default=False,
                      help = """
                      Only used if multiple programs are specified. If true,
                      then the number of threads per cpu is same as the
                      number of programs.""")

    # Memory Options
    parser.add_option("--list-mem-types",
                      action="callback", callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type", type="choice", default="simple_mem",
                      choices=MemConfig.mem_names(),
                      help = "type of memory to use")
    parser.add_option("--mem-channels", type="int", default=1,
                      help = "number of memory channels")
    parser.add_option("--mem-size", action="store", type="string",
                      default="4GB",
                      help="Specify the physical memory size (single memory)")

    # Cache Options
    parser.add_option("--caches", action="store_true")
    parser.add_option("--l2cache", action="store_true")
    #PRODROMOU
    parser.add_option("--l3cache",
                      action = "store_true",
                      help = "Enable L3 cache (Implies L2)")
    parser.add_option("-b", "--benchmark", default="",
                 help="The benchmark to be loaded.")
    parser.add_option("--bench-size", default="ref",
                 help="The size of the benchmark <train/ref>")
    parser.add_option("--total-insts", type="int", 
		       default = 0, # by default "if options.total_insts" fails
		 help="If defined, the simulation is going to keep running until the total number of instructions has been executed accross all threads")
    parser.add_option("--mempolicy", default = "frfcfs", 
		 help="The memory controller scheduling policy to be used")
    parser.add_option("--ckpt-nickname", default=None, type="string",
		 help="If defined, the simulator will use it as part of the checkpoint's name. Example (nickname set as memIntense): cpt.memIntense.20140693 instead of cpt.None.20140693")
    parser.add_option("--mutlu", action="store_true",
                 help="Creates the mem hierarchy used in Mutlu's Par-BS paper")
    parser.add_option("-d", "--dump-interval", default=0, type="int",
		 help="Dumps statistics every defined interval")
    parser.add_option("--per-access-slowdown", default="0ns", type="string",
		 help="Sets the MC's static delay per access. Only used custom_tcl MC class")
    parser.add_option("--slowdown-accesses", default=False, action="store_true", 
		help="Enables per access slowdown. Amount of delay passed with --per-access-slowdown")


    #PRODROMOU
    parser.add_option("--fastmem", action="store_true")
    parser.add_option("--num-dirs", type="int", default=1)
    parser.add_option("--num-l2caches", type="int", default=1)
    parser.add_option("--num-l3caches", type="int", default=1)
    parser.add_option("--l1d_size", type="string", default="32kB")
    parser.add_option("--l1i_size", type="string", default="32kB")
    parser.add_option("--l2_size", type="string", default="512kB")
    parser.add_option("--l3_size", type="string", default="16MB")
    parser.add_option("--l1d_assoc", type="int", default=2)
    parser.add_option("--l1i_assoc", type="int", default=2)
    parser.add_option("--l2_assoc", type="int", default=8)
    parser.add_option("--l3_assoc", type="int", default=16)
    parser.add_option("--cacheline_size", type="int", default=64)

    # Enable Ruby
    parser.add_option("--ruby", action="store_true")

    # Run duration options
    parser.add_option("-m", "--abs-max-tick", type="int", default=None,
                      metavar="TICKS", help="Run to absolute simulated tick " \
                      "specified including ticks from a restored checkpoint")
    parser.add_option("--rel-max-tick", type="int", default=None,
                      metavar="TICKS", help="Simulate for specified number of" \
                      " ticks relative to the simulation start tick (e.g. if " \
                      "restoring a checkpoint)")
    parser.add_option("--maxtime", type="float", default=None,
                      help="Run to the specified absolute simulated time in " \
                      "seconds")
    parser.add_option("-I", "--maxinsts", action="store", type="int",
                      default=None, help="""Total number of instructions to
                                            simulate (default: run forever)""")
    parser.add_option("--work-item-id", action="store", type="int",
                      help="the specific work id for exit & checkpointing")
    parser.add_option("--work-begin-cpu-id-exit", action="store", type="int",
                      help="exit when work starts on the specified cpu")
    parser.add_option("--work-end-exit-count", action="store", type="int",
                      help="exit at specified work end count")
    parser.add_option("--work-begin-exit-count", action="store", type="int",
                      help="exit at specified work begin count")
    parser.add_option("--init-param", action="store", type="int", default=0,
                      help="""Parameter available in simulation with m5
                              initparam""")

    # Simpoint options
    parser.add_option("--simpoint-profile", action="store_true",
                      help="Enable basic block profiling for SimPoints")
    parser.add_option("--simpoint-interval", type="int", default=10000000,
                      help="SimPoint interval in num of instructions")

    # Checkpointing options
    ###Note that performing checkpointing via python script files will override
    ###checkpoint instructions built into binaries.
    parser.add_option("--take-checkpoints", action="store", type="string",
        help="<M,N> take checkpoints at tick M and every N ticks thereafter")
    parser.add_option("--max-checkpoints", action="store", type="int",
        help="the maximum number of checkpoints to drop", default=5)
    parser.add_option("--checkpoint-dir", action="store", type="string",
        help="Place all checkpoints in this absolute directory")
    parser.add_option("-r", "--checkpoint-restore", action="store", type="int",
        help="restore from checkpoint <N>")
    parser.add_option("--checkpoint-at-end", action="store_true",
                      help="take a checkpoint at end of run")
    parser.add_option("--work-begin-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work begin count")
    parser.add_option("--work-end-checkpoint-count", action="store", type="int",
                      help="checkpoint at specified work end count")
    parser.add_option("--work-cpus-checkpoint-count", action="store", type="int",
                      help="checkpoint and exit when active cpu count is reached")
    parser.add_option("--restore-with-cpu", action="store", type="choice",
                      default="atomic", choices=CpuConfig.cpu_names(),
                      help = "cpu type for restoring from a checkpoint")


    # CPU Switching - default switch model goes from a checkpoint
    # to a timing simple CPU with caches to warm up, then to detailed CPU for
    # data measurement
    parser.add_option("--repeat-switch", action="store", type="int",
        default=None,
        help="switch back and forth between CPUs with period <N>")
    parser.add_option("-s", "--standard-switch", action="store", type="int",
        default=None,
        help="switch from timing to Detailed CPU after warmup period of <N>")
    parser.add_option("-p", "--prog-interval", type="str",
        help="CPU Progress Interval")

    # Fastforwarding and simpoint related materials
    parser.add_option("-W", "--warmup-insts", action="store", type="int",
        default=None,
        help="Warmup period in total instructions (requires --standard-switch)")
    parser.add_option("--bench", action="store", type="string", default=None,
        help="base names for --take-checkpoint and --checkpoint-restore")
    parser.add_option("-F", "--fast-forward", action="store", type="string",
        default=None,
        help="Number of instructions to fast forward before switching")
    parser.add_option("-S", "--simpoint", action="store_true", default=False,
        help="""Use workload simpoints as an instruction offset for
                --checkpoint-restore or --take-checkpoint.""")
    parser.add_option("--at-instruction", action="store_true", default=False,
        help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                number of instructions.""")
Beispiel #44
0
            import google.protobuf
        except:
            print "Please install the Python protobuf module"
            exit(-1)

        import packet_pb2
    else:
        print "Failed to import packet proto definitions"
        exit(-1)

parser = optparse.OptionParser()

parser.add_option("--mem-type",
                  type="choice",
                  default="DDR3_1600_8x8",
                  choices=MemConfig.mem_names(),
                  help="type of memory to use")
parser.add_option("--mem-size",
                  action="store",
                  type="string",
                  default="16MB",
                  help="Specify the memory size")
parser.add_option("--reuse-trace",
                  action="store_true",
                  help="Prevent generation of traces and reuse existing")

(options, args) = parser.parse_args()

if args:
    print "Error: script doesn't take any positional arguments"
    sys.exit(1)
Beispiel #45
0
def addNoISAOptions(parser):
    parser.add_option("-n", "--num-cpus", type="int", default=1)
    parser.add_option("--sys-voltage", action="store", type="string",
                      default='1.0V',
                      help = """Top-level voltage for blocks running at system
                      power supply""")
    parser.add_option("--sys-clock", action="store", type="string",
                      default='1GHz',
                      help = """Top-level clock for blocks running at system
                      speed""")

    # Memory Options
    parser.add_option("--list-mem-types",
                      action="callback", callback=_listMemTypes,
                      help="List available memory types")
    parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64",
                      choices=MemConfig.mem_names(),
                      help = "type of memory to use")
    parser.add_option("--mem-channels", type="int", default=1,
                      help = "number of memory channels")
    parser.add_option("--mem-ranks", type="int", default=None,
                      help = "number of memory ranks per channel")
    parser.add_option("--mem-size", action="store", type="string",
                      default="512MB",
                      help="Specify the physical memory size (single memory)")


    parser.add_option("--memchecker", action="store_true")

    # Cache Options
    parser.add_option("--external-memory-system", type="string",
                      help="use external ports of this port_type for caches")
    parser.add_option("--tlm-memory", type="string",
                      help="use external port for SystemC TLM cosimulation")
    parser.add_option("--caches", action="store_true")
    parser.add_option("--l2cache", action="store_true")
    parser.add_option("--num-dirs", type="int", default=1)
    parser.add_option("--num-l2caches", type="int", default=1)
    parser.add_option("--num-l3caches", type="int", default=1)
    parser.add_option("--l1d_size", type="string", default="64kB")
    parser.add_option("--l1i_size", type="string", default="32kB")
    parser.add_option("--l2_size", type="string", default="2MB")
    parser.add_option("--l3_size", type="string", default="16MB")
    parser.add_option("--l1d_assoc", type="int", default=2)
    parser.add_option("--l1i_assoc", type="int", default=2)
    parser.add_option("--l2_assoc", type="int", default=8)
    parser.add_option("--l3_assoc", type="int", default=16)
    parser.add_option("--cacheline_size", type="int", default=64)

    # Enable Ruby
    parser.add_option("--ruby", action="store_true")

    # Run duration options
    parser.add_option("-m", "--abs-max-tick", type="int", default=m5.MaxTick,
                      metavar="TICKS", help="Run to absolute simulated tick "
                      "specified including ticks from a restored checkpoint")
    parser.add_option("--rel-max-tick", type="int", default=None,
                      metavar="TICKS", help="Simulate for specified number of"
                      " ticks relative to the simulation start tick (e.g. if "
                      "restoring a checkpoint)")
    parser.add_option("--maxtime", type="float", default=None,
                      help="Run to the specified absolute simulated time in "
                      "seconds")