Esempio n. 1
0
 def registerTopology(self, options):
     i = 0
     for n in numa_nodes:
         if n:
             FileSystemConfig.register_node(n,
                 MemorySize(options.mem_size) / num_numa_nodes, i)
         i += 1
Esempio n. 2
0
    def makeTopology(self, options, full_system, network, IntLink, ExtLink,
                     Router):
        nodes = self.nodes

        num_routers = options.num_cpus
        num_rows = options.mesh_rows

        # default values for link latency and router latency.
        # Can be over-ridden on a per link/router basis
        link_latency = options.link_latency  # used by simple and garnet
        router_latency = options.router_latency  # only used by garnet

        # First determine which nodes are cache cntrls vs. dirs vs. dma
        cache_nodes = []
        dir_nodes = []
        dma_nodes = []
        for node in nodes:
            if node.type == 'L1Cache_Controller' or \
            node.type == 'L2Cache_Controller':
                cache_nodes.append(node)
            elif node.type == 'Directory_Controller':
                dir_nodes.append(node)
            elif node.type == 'DMA_Controller':
                dma_nodes.append(node)

        # Obviously the number or rows must be <= the number of routers
        # and evenly divisible.  Also the number of caches must be a
        # multiple of the number of routers and the number of directories
        # must be four.
        assert (num_rows > 0 and num_rows <= num_routers)
        num_columns = int(num_routers / num_rows)
        assert (num_columns * num_rows == num_routers)
        caches_per_router, remainder = divmod(len(cache_nodes), num_routers)
        assert (remainder == 0)
        assert (len(dir_nodes) == 4)

        # Create the routers in the mesh
        routers = [Router(router_id=i, latency = router_latency) \
            for i in range(num_routers)]
        network.routers = routers

        # link counter to set unique link ids
        link_count = 0

        # Connect each cache controller to the appropriate router
        ext_links = []
        for (i, n) in enumerate(cache_nodes):
            cntrl_level, router_id = divmod(i, num_routers)
            assert (cntrl_level < caches_per_router)
            ext_links.append(
                ExtLink(link_id=link_count,
                        ext_node=n,
                        int_node=routers[router_id],
                        latency=link_latency))
            link_count += 1

        # NUMA Node for each quadrant
        # With odd columns or rows, the nodes will be unequal
        numa_nodes = [[], [], [], []]
        for i in xrange(num_routers):
            if i % num_columns < num_columns / 2  and \
               i < num_routers / 2:
                numa_nodes[0].append(i)
            elif i % num_columns >= num_columns / 2  and \
               i < num_routers / 2:
                numa_nodes[1].append(i)
            elif i % num_columns < num_columns / 2  and \
               i >= num_routers / 2:
                numa_nodes[2].append(i)
            else:
                numa_nodes[3].append(i)

        num_numa_nodes = 0
        for n in numa_nodes:
            if n:
                num_numa_nodes += 1

        # Register nodes with filesystem
        if not full_system:
            for n in numa_nodes:
                if n:
                    FileSystemConfig.register_node(
                        n,
                        MemorySize(options.mem_size) / num_numa_nodes)

        # Connect the dir nodes to the corners.
        ext_links.append(
            ExtLink(link_id=link_count,
                    ext_node=dir_nodes[0],
                    int_node=routers[0],
                    latency=link_latency))
        link_count += 1
        ext_links.append(
            ExtLink(link_id=link_count,
                    ext_node=dir_nodes[1],
                    int_node=routers[num_columns - 1],
                    latency=link_latency))
        link_count += 1
        ext_links.append(
            ExtLink(link_id=link_count,
                    ext_node=dir_nodes[2],
                    int_node=routers[num_routers - num_columns],
                    latency=link_latency))
        link_count += 1
        ext_links.append(
            ExtLink(link_id=link_count,
                    ext_node=dir_nodes[3],
                    int_node=routers[num_routers - 1],
                    latency=link_latency))
        link_count += 1

        # Connect the dma nodes to router 0.  These should only be DMA nodes.
        for (i, node) in enumerate(dma_nodes):
            assert (node.type == 'DMA_Controller')
            ext_links.append(
                ExtLink(link_id=link_count,
                        ext_node=node,
                        int_node=routers[0],
                        latency=link_latency))

        network.ext_links = ext_links

        # Create the mesh links.
        int_links = []

        # East output to West input links (weight = 1)
        for row in xrange(num_rows):
            for col in xrange(num_columns):
                if (col + 1 < num_columns):
                    east_out = col + (row * num_columns)
                    west_in = (col + 1) + (row * num_columns)
                    int_links.append(
                        IntLink(link_id=link_count,
                                src_node=routers[east_out],
                                dst_node=routers[west_in],
                                src_outport="East",
                                dst_inport="West",
                                latency=link_latency,
                                weight=1))
                    link_count += 1

        # West output to East input links (weight = 1)
        for row in xrange(num_rows):
            for col in xrange(num_columns):
                if (col + 1 < num_columns):
                    east_in = col + (row * num_columns)
                    west_out = (col + 1) + (row * num_columns)
                    int_links.append(
                        IntLink(link_id=link_count,
                                src_node=routers[west_out],
                                dst_node=routers[east_in],
                                src_outport="West",
                                dst_inport="East",
                                latency=link_latency,
                                weight=1))
                    link_count += 1

        # North output to South input links (weight = 2)
        for col in xrange(num_columns):
            for row in xrange(num_rows):
                if (row + 1 < num_rows):
                    north_out = col + (row * num_columns)
                    south_in = col + ((row + 1) * num_columns)
                    int_links.append(
                        IntLink(link_id=link_count,
                                src_node=routers[north_out],
                                dst_node=routers[south_in],
                                src_outport="North",
                                dst_inport="South",
                                latency=link_latency,
                                weight=2))
                    link_count += 1

        # South output to North input links (weight = 2)
        for col in xrange(num_columns):
            for row in xrange(num_rows):
                if (row + 1 < num_rows):
                    north_in = col + (row * num_columns)
                    south_out = col + ((row + 1) * num_columns)
                    int_links.append(
                        IntLink(link_id=link_count,
                                src_node=routers[south_out],
                                dst_node=routers[north_in],
                                src_outport="South",
                                dst_inport="North",
                                latency=link_latency,
                                weight=2))
                    link_count += 1

        network.int_links = int_links
Esempio n. 3
0
 def registerTopology(self, options):
     for i in range(options.num_cpus):
         FileSystemConfig.register_node([i],
                                        MemorySize(options.mem_size) //
                                        options.num_cpus, i)
Esempio n. 4
0
def create_system(options, full_system, system, dma_ports, bootmem,
                  ruby_system):

    if buildEnv['PROTOCOL'] != 'MESI_Three_Level':
        fatal("This script requires the MESI_Three_Level protocol to be\
               built.")

    cpu_sequencers = []

    #
    # The ruby network creation expects the list of nodes in the system to be
    # consistent with the NetDest list.  Therefore the l1 controller nodes
    # must be listed before the directory nodes and directory nodes before
    # dma nodes, etc.
    #
    l0_cntrl_nodes = []
    l1_cntrl_nodes = []
    l2_cntrl_nodes = []
    dma_cntrl_nodes = []

    assert (options.num_cpus % options.num_clusters == 0)
    num_cpus_per_cluster = options.num_cpus // options.num_clusters

    assert (options.num_l2caches % options.num_clusters == 0)
    num_l2caches_per_cluster = options.num_l2caches // options.num_clusters

    l2_bits = int(math.log(num_l2caches_per_cluster, 2))
    block_size_bits = int(math.log(options.cacheline_size, 2))
    l2_index_start = block_size_bits + l2_bits

    #
    # Must create the individual controllers before the network to ensure the
    # controller constructors are called before the network constructor
    #
    for i in range(options.num_clusters):
        for j in range(num_cpus_per_cluster):
            #
            # First create the Ruby objects associated with this cpu
            #
            l0i_cache = L0Cache(size = options.l0i_size,
                assoc = options.l0i_assoc,
                is_icache = True,
                start_index_bit = block_size_bits,
                replacement_policy = LRURP())

            l0d_cache = L0Cache(size = options.l0d_size,
                assoc = options.l0d_assoc,
                is_icache = False,
                start_index_bit = block_size_bits,
                replacement_policy = LRURP())

            # the ruby random tester reuses num_cpus to specify the
            # number of cpu ports connected to the tester object, which
            # is stored in system.cpu. because there is only ever one
            # tester object, num_cpus is not necessarily equal to the
            # size of system.cpu; therefore if len(system.cpu) == 1
            # we use system.cpu[0] to set the clk_domain, thereby ensuring
            # we don't index off the end of the cpu list.
            if len(system.cpu) == 1:
                clk_domain = system.cpu[0].clk_domain
            else:
                clk_domain = system.cpu[i].clk_domain

            # Ruby prefetcher
            prefetcher = RubyPrefetcher(
                num_streams=16,
                unit_filter = 256,
                nonunit_filter = 256,
                train_misses = 5,
                num_startup_pfs = 4,
                cross_page = True
            )

            l0_cntrl = L0Cache_Controller(
                   version = i * num_cpus_per_cluster + j,
                   Icache = l0i_cache, Dcache = l0d_cache,
                   transitions_per_cycle = options.l0_transitions_per_cycle,
                   prefetcher = prefetcher,
                   enable_prefetch = options.enable_prefetch,
                   send_evictions = send_evicts(options),
                   clk_domain = clk_domain,
                   ruby_system = ruby_system)

            cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j,
                                    icache = l0i_cache,
                                    clk_domain = clk_domain,
                                    dcache = l0d_cache,
                                    ruby_system = ruby_system)

            l0_cntrl.sequencer = cpu_seq

            l1_cache = L1Cache(size = options.l1d_size,
                               assoc = options.l1d_assoc,
                               start_index_bit = block_size_bits,
                               is_icache = False)

            l1_cntrl = L1Cache_Controller(
                    version = i * num_cpus_per_cluster + j,
                    cache = l1_cache, l2_select_num_bits = l2_bits,
                    cluster_id = i,
                    transitions_per_cycle = options.l1_transitions_per_cycle,
                    ruby_system = ruby_system)

            exec("ruby_system.l0_cntrl%d = l0_cntrl"
                 % ( i * num_cpus_per_cluster + j))
            exec("ruby_system.l1_cntrl%d = l1_cntrl"
                 % ( i * num_cpus_per_cluster + j))

            #
            # Add controllers and sequencers to the appropriate lists
            #
            cpu_sequencers.append(cpu_seq)
            l0_cntrl_nodes.append(l0_cntrl)
            l1_cntrl_nodes.append(l1_cntrl)

            # Connect the L0 and L1 controllers
            l0_cntrl.prefetchQueue = MessageBuffer()
            l0_cntrl.mandatoryQueue = MessageBuffer()
            l0_cntrl.bufferToL1 = MessageBuffer(ordered = True)
            l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1
            l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True)
            l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1

            # Connect the L1 controllers and the network
            l1_cntrl.requestToL2 = MessageBuffer()
            l1_cntrl.requestToL2.master = ruby_system.network.slave
            l1_cntrl.responseToL2 = MessageBuffer()
            l1_cntrl.responseToL2.master = ruby_system.network.slave
            l1_cntrl.unblockToL2 = MessageBuffer()
            l1_cntrl.unblockToL2.master = ruby_system.network.slave

            l1_cntrl.requestFromL2 = MessageBuffer()
            l1_cntrl.requestFromL2.slave = ruby_system.network.master
            l1_cntrl.responseFromL2 = MessageBuffer()
            l1_cntrl.responseFromL2.slave = ruby_system.network.master


        for j in range(num_l2caches_per_cluster):
            l2_cache = L2Cache(size = options.l2_size,
                               assoc = options.l2_assoc,
                               start_index_bit = l2_index_start)

            l2_cntrl = L2Cache_Controller(
                        version = i * num_l2caches_per_cluster + j,
                        L2cache = l2_cache, cluster_id = i,
                        transitions_per_cycle =\
                         options.l2_transitions_per_cycle,
                        ruby_system = ruby_system)

            exec("ruby_system.l2_cntrl%d = l2_cntrl"
                 % (i * num_l2caches_per_cluster + j))
            l2_cntrl_nodes.append(l2_cntrl)

            # Connect the L2 controllers and the network
            l2_cntrl.DirRequestFromL2Cache = MessageBuffer()
            l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave
            l2_cntrl.L1RequestFromL2Cache = MessageBuffer()
            l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave
            l2_cntrl.responseFromL2Cache = MessageBuffer()
            l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave

            l2_cntrl.unblockToL2Cache = MessageBuffer()
            l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master
            l2_cntrl.L1RequestToL2Cache = MessageBuffer()
            l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master
            l2_cntrl.responseToL2Cache = MessageBuffer()
            l2_cntrl.responseToL2Cache.slave = ruby_system.network.master

    # Run each of the ruby memory controllers at a ratio of the frequency of
    # the ruby system
    # clk_divider value is a fix to pass regression.
    ruby_system.memctrl_clk_domain = DerivedClockDomain(
            clk_domain = ruby_system.clk_domain, clk_divider = 3)

    mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
        options, bootmem, ruby_system, system)
    dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
    if rom_dir_cntrl_node is not None:
        dir_cntrl_nodes.append(rom_dir_cntrl_node)
    for dir_cntrl in dir_cntrl_nodes:
        # Connect the directory controllers and the network
        dir_cntrl.requestToDir = MessageBuffer()
        dir_cntrl.requestToDir.slave = ruby_system.network.master
        dir_cntrl.responseToDir = MessageBuffer()
        dir_cntrl.responseToDir.slave = ruby_system.network.master
        dir_cntrl.responseFromDir = MessageBuffer()
        dir_cntrl.responseFromDir.master = ruby_system.network.slave
        dir_cntrl.requestToMemory = MessageBuffer()
        dir_cntrl.responseFromMemory = MessageBuffer()

    for i, dma_port in enumerate(dma_ports):
        #
        # Create the Ruby objects associated with the dma controller
        #
        dma_seq = DMASequencer(version = i, ruby_system = ruby_system)

        dma_cntrl = DMA_Controller(version = i,
                                   dma_sequencer = dma_seq,
                                   transitions_per_cycle = options.ports,
                                   ruby_system = ruby_system)

        exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
        exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i)
        dma_cntrl_nodes.append(dma_cntrl)

        # Connect the dma controller to the network
        dma_cntrl.mandatoryQueue = MessageBuffer()
        dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
        dma_cntrl.responseFromDir.slave = ruby_system.network.master
        dma_cntrl.requestToDir = MessageBuffer()
        dma_cntrl.requestToDir.master = ruby_system.network.slave

    all_cntrls = l0_cntrl_nodes + \
                 l1_cntrl_nodes + \
                 l2_cntrl_nodes + \
                 dir_cntrl_nodes + \
                 dma_cntrl_nodes

    # Create the io controller and the sequencer
    if full_system:
        io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
        ruby_system._io_port = io_seq
        io_controller = DMA_Controller(version = len(dma_ports),
                                       dma_sequencer = io_seq,
                                       ruby_system = ruby_system)
        ruby_system.io_controller = io_controller

        # Connect the dma controller to the network
        io_controller.mandatoryQueue = MessageBuffer()
        io_controller.responseFromDir = MessageBuffer(ordered = True)
        io_controller.responseFromDir.slave = ruby_system.network.master
        io_controller.requestToDir = MessageBuffer()
        io_controller.requestToDir.master = ruby_system.network.slave

        all_cntrls = all_cntrls + [io_controller]
    # Register configuration with filesystem
    else:
        for i in range(options.num_clusters):
            for j in range(num_cpus_per_cluster):
                FileSystemConfig.register_cpu(physical_package_id = 0,
                                              core_siblings = range(options.num_cpus),
                                              core_id = i*num_cpus_per_cluster+j,
                                              thread_siblings = [])

                FileSystemConfig.register_cache(level = 0,
                                                idu_type = 'Instruction',
                                                size = options.l0i_size,
                                                line_size =\
                                                 options.cacheline_size,
                                                assoc = 1,
                                                cpus = [i*num_cpus_per_cluster+j])
                FileSystemConfig.register_cache(level = 0,
                                                idu_type = 'Data',
                                                size = options.l0d_size,
                                                line_size =\
                                                 options.cacheline_size,
                                                assoc = 1,
                                                cpus = [i*num_cpus_per_cluster+j])

                FileSystemConfig.register_cache(level = 1,
                                                idu_type = 'Unified',
                                                size = options.l1d_size,
                                                line_size = options.cacheline_size,
                                                assoc = options.l1d_assoc,
                                                cpus = [i*num_cpus_per_cluster+j])

            FileSystemConfig.register_cache(level = 2,
                                            idu_type = 'Unified',
                                            size = str(MemorySize(options.l2_size) * \
                                                   num_l2caches_per_cluster)+'B',
                                            line_size = options.cacheline_size,
                                            assoc = options.l2_assoc,
                                            cpus = [n for n in range(i*num_cpus_per_cluster, \
                                                                     (i+1)*num_cpus_per_cluster)])

    ruby_system.network.number_of_virtual_networks = 3
    topology = create_topology(all_cntrls, options)
    return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
Esempio n. 5
0
def create_system(options, full_system, system, dma_devices, bootmem,
                  ruby_system):
    if buildEnv['PROTOCOL'] != 'GPU_VIPER':
        panic("This script requires the GPU_VIPER protocol to be built.")

    cpu_sequencers = []

    #
    # The ruby network creation expects the list of nodes in the system to be
    # consistent with the NetDest list.  Therefore the l1 controller nodes
    # must be listed before the directory nodes and directory nodes before
    # dma nodes, etc.
    #
    cp_cntrl_nodes = []
    tcp_cntrl_nodes = []
    sqc_cntrl_nodes = []
    tcc_cntrl_nodes = []
    dir_cntrl_nodes = []
    l3_cntrl_nodes = []

    #
    # Must create the individual controllers before the network to ensure the
    # controller constructors are called before the network constructor
    #

    # For an odd number of CPUs, still create the right number of controllers
    TCC_bits = int(math.log(options.num_tccs, 2))

    # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
    # Clusters
    crossbar_bw = None
    mainCluster = None

    if options.numa_high_bit:
        numa_bit = options.numa_high_bit
    else:
        # if the numa_bit is not specified, set the directory bits as the
        # lowest bits above the block offset bits, and the numa_bit as the
        # highest of those directory bits
        dir_bits = int(math.log(options.num_dirs, 2))
        block_size_bits = int(math.log(options.cacheline_size, 2))
        numa_bit = block_size_bits + dir_bits - 1

    if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
        #Assuming a 2GHz clock
        crossbar_bw = 16 * options.num_compute_units * options.bw_scalor
        mainCluster = Cluster(intBW=crossbar_bw)
    else:
        mainCluster = Cluster(intBW=8)  # 16 GB/s
    for i in range(options.num_dirs):
        dir_ranges = []
        for r in system.mem_ranges:
            addr_range = m5.objects.AddrRange(r.start,
                                              size=r.size(),
                                              intlvHighBit=numa_bit,
                                              intlvBits=dir_bits,
                                              intlvMatch=i)
            dir_ranges.append(addr_range)

        dir_cntrl = DirCntrl(noTCCdir=True, TCC_select_num_bits=TCC_bits)
        dir_cntrl.create(options, dir_ranges, ruby_system, system)
        dir_cntrl.number_of_TBEs = options.num_tbes
        dir_cntrl.useL3OnWT = options.use_L3_on_WT
        # the number_of_TBEs is inclusive of TBEs below

        # Connect the Directory controller to the ruby network
        dir_cntrl.requestFromCores = MessageBuffer(ordered=True)
        dir_cntrl.requestFromCores.slave = ruby_system.network.master

        dir_cntrl.responseFromCores = MessageBuffer()
        dir_cntrl.responseFromCores.slave = ruby_system.network.master

        dir_cntrl.unblockFromCores = MessageBuffer()
        dir_cntrl.unblockFromCores.slave = ruby_system.network.master

        dir_cntrl.probeToCore = MessageBuffer()
        dir_cntrl.probeToCore.master = ruby_system.network.slave

        dir_cntrl.responseToCore = MessageBuffer()
        dir_cntrl.responseToCore.master = ruby_system.network.slave

        dir_cntrl.triggerQueue = MessageBuffer(ordered=True)
        dir_cntrl.L3triggerQueue = MessageBuffer(ordered=True)
        dir_cntrl.requestToMemory = MessageBuffer()
        dir_cntrl.responseFromMemory = MessageBuffer()

        dir_cntrl.requestFromDMA = MessageBuffer(ordered=True)
        dir_cntrl.requestFromDMA.slave = ruby_system.network.master

        dir_cntrl.responseToDMA = MessageBuffer()
        dir_cntrl.responseToDMA.master = ruby_system.network.slave

        dir_cntrl.requestToMemory = MessageBuffer()
        dir_cntrl.responseFromMemory = MessageBuffer()

        exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
        dir_cntrl_nodes.append(dir_cntrl)

        mainCluster.add(dir_cntrl)

    cpuCluster = None
    if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
        cpuCluster = Cluster(extBW=crossbar_bw, intBW=crossbar_bw)
    else:
        cpuCluster = Cluster(extBW=8, intBW=8)  # 16 GB/s
    for i in range((options.num_cpus + 1) // 2):

        cp_cntrl = CPCntrl()
        cp_cntrl.create(options, ruby_system, system)

        exec("ruby_system.cp_cntrl%d = cp_cntrl" % i)
        #
        # Add controllers and sequencers to the appropriate lists
        #
        cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])

        # Connect the CP controllers and the network
        cp_cntrl.requestFromCore = MessageBuffer()
        cp_cntrl.requestFromCore.master = ruby_system.network.slave

        cp_cntrl.responseFromCore = MessageBuffer()
        cp_cntrl.responseFromCore.master = ruby_system.network.slave

        cp_cntrl.unblockFromCore = MessageBuffer()
        cp_cntrl.unblockFromCore.master = ruby_system.network.slave

        cp_cntrl.probeToCore = MessageBuffer()
        cp_cntrl.probeToCore.slave = ruby_system.network.master

        cp_cntrl.responseToCore = MessageBuffer()
        cp_cntrl.responseToCore.slave = ruby_system.network.master

        cp_cntrl.mandatoryQueue = MessageBuffer()
        cp_cntrl.triggerQueue = MessageBuffer(ordered=True)

        cpuCluster.add(cp_cntrl)

    # Register CPUs and caches for each CorePair and directory (SE mode only)
    if not full_system:
        for i in range((options.num_cpus + 1) // 2):
            FileSystemConfig.register_cpu(physical_package_id = 0,
                                          core_siblings = \
                                            range(options.num_cpus),
                                          core_id = i*2,
                                          thread_siblings = [])

            FileSystemConfig.register_cpu(physical_package_id = 0,
                                          core_siblings = \
                                            range(options.num_cpus),
                                          core_id = i*2+1,
                                          thread_siblings = [])

            FileSystemConfig.register_cache(level=0,
                                            idu_type='Instruction',
                                            size=options.l1i_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l1i_assoc,
                                            cpus=[i * 2, i * 2 + 1])

            FileSystemConfig.register_cache(level=0,
                                            idu_type='Data',
                                            size=options.l1d_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l1d_assoc,
                                            cpus=[i * 2])

            FileSystemConfig.register_cache(level=0,
                                            idu_type='Data',
                                            size=options.l1d_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l1d_assoc,
                                            cpus=[i * 2 + 1])

            FileSystemConfig.register_cache(level=1,
                                            idu_type='Unified',
                                            size=options.l2_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l2_assoc,
                                            cpus=[i * 2, i * 2 + 1])

        for i in range(options.num_dirs):
            FileSystemConfig.register_cache(
                level=2,
                idu_type='Unified',
                size=options.l3_size,
                line_size=options.cacheline_size,
                assoc=options.l3_assoc,
                cpus=[n for n in range(options.num_cpus)])

    gpuCluster = None
    if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
        gpuCluster = Cluster(extBW=crossbar_bw, intBW=crossbar_bw)
    else:
        gpuCluster = Cluster(extBW=8, intBW=8)  # 16 GB/s
    for i in range(options.num_compute_units):

        tcp_cntrl = TCPCntrl(TCC_select_num_bits=TCC_bits,
                             issue_latency=1,
                             number_of_TBEs=2560)
        # TBEs set to max outstanding requests
        tcp_cntrl.create(options, ruby_system, system)
        tcp_cntrl.WB = options.WB_L1
        tcp_cntrl.disableL1 = options.noL1
        tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
        tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency

        exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i)
        #
        # Add controllers and sequencers to the appropriate lists
        #
        cpu_sequencers.append(tcp_cntrl.coalescer)
        tcp_cntrl_nodes.append(tcp_cntrl)

        # Connect the TCP controller to the ruby network
        tcp_cntrl.requestFromTCP = MessageBuffer(ordered=True)
        tcp_cntrl.requestFromTCP.master = ruby_system.network.slave

        tcp_cntrl.responseFromTCP = MessageBuffer(ordered=True)
        tcp_cntrl.responseFromTCP.master = ruby_system.network.slave

        tcp_cntrl.unblockFromCore = MessageBuffer()
        tcp_cntrl.unblockFromCore.master = ruby_system.network.slave

        tcp_cntrl.probeToTCP = MessageBuffer(ordered=True)
        tcp_cntrl.probeToTCP.slave = ruby_system.network.master

        tcp_cntrl.responseToTCP = MessageBuffer(ordered=True)
        tcp_cntrl.responseToTCP.slave = ruby_system.network.master

        tcp_cntrl.mandatoryQueue = MessageBuffer()

        gpuCluster.add(tcp_cntrl)

    for i in range(options.num_sqc):

        sqc_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits)
        sqc_cntrl.create(options, ruby_system, system)

        exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i)
        #
        # Add controllers and sequencers to the appropriate lists
        #
        cpu_sequencers.append(sqc_cntrl.sequencer)

        # Connect the SQC controller to the ruby network
        sqc_cntrl.requestFromSQC = MessageBuffer(ordered=True)
        sqc_cntrl.requestFromSQC.master = ruby_system.network.slave

        sqc_cntrl.probeToSQC = MessageBuffer(ordered=True)
        sqc_cntrl.probeToSQC.slave = ruby_system.network.master

        sqc_cntrl.responseToSQC = MessageBuffer(ordered=True)
        sqc_cntrl.responseToSQC.slave = ruby_system.network.master

        sqc_cntrl.mandatoryQueue = MessageBuffer()

        # SQC also in GPU cluster
        gpuCluster.add(sqc_cntrl)

    for i in xrange(options.num_scalar_cache):
        scalar_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits)
        scalar_cntrl.create(options, ruby_system, system)

        exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i)

        cpu_sequencers.append(scalar_cntrl.sequencer)

        scalar_cntrl.requestFromSQC = MessageBuffer(ordered=True)
        scalar_cntrl.requestFromSQC.master = ruby_system.network.slave

        scalar_cntrl.probeToSQC = MessageBuffer(ordered=True)
        scalar_cntrl.probeToSQC.slave = ruby_system.network.master

        scalar_cntrl.responseToSQC = MessageBuffer(ordered=True)
        scalar_cntrl.responseToSQC.slave = ruby_system.network.master

        scalar_cntrl.mandatoryQueue = \
            MessageBuffer(buffer_size=options.scalar_buffer_size)

        gpuCluster.add(scalar_cntrl)

    for i in xrange(options.num_cp):

        tcp_ID = options.num_compute_units + i
        sqc_ID = options.num_sqc + i

        tcp_cntrl = TCPCntrl(TCC_select_num_bits=TCC_bits,
                             issue_latency=1,
                             number_of_TBEs=2560)
        # TBEs set to max outstanding requests
        tcp_cntrl.createCP(options, ruby_system, system)
        tcp_cntrl.WB = options.WB_L1
        tcp_cntrl.disableL1 = options.noL1
        tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
        tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency

        exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID)
        #
        # Add controllers and sequencers to the appropriate lists
        #
        cpu_sequencers.append(tcp_cntrl.sequencer)
        tcp_cntrl_nodes.append(tcp_cntrl)

        # Connect the CP (TCP) controllers to the ruby network
        tcp_cntrl.requestFromTCP = MessageBuffer(ordered=True)
        tcp_cntrl.requestFromTCP.master = ruby_system.network.slave

        tcp_cntrl.responseFromTCP = MessageBuffer(ordered=True)
        tcp_cntrl.responseFromTCP.master = ruby_system.network.slave

        tcp_cntrl.unblockFromCore = MessageBuffer(ordered=True)
        tcp_cntrl.unblockFromCore.master = ruby_system.network.slave

        tcp_cntrl.probeToTCP = MessageBuffer(ordered=True)
        tcp_cntrl.probeToTCP.slave = ruby_system.network.master

        tcp_cntrl.responseToTCP = MessageBuffer(ordered=True)
        tcp_cntrl.responseToTCP.slave = ruby_system.network.master

        tcp_cntrl.mandatoryQueue = MessageBuffer()

        gpuCluster.add(tcp_cntrl)

        sqc_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits)
        sqc_cntrl.create(options, ruby_system, system)

        exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID)
        #
        # Add controllers and sequencers to the appropriate lists
        #
        cpu_sequencers.append(sqc_cntrl.sequencer)

        # SQC also in GPU cluster
        gpuCluster.add(sqc_cntrl)

    for i in range(options.num_tccs):

        tcc_cntrl = TCCCntrl(l2_response_latency=options.TCC_latency)
        tcc_cntrl.create(options, ruby_system, system)
        tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
        tcc_cntrl.l2_response_latency = options.TCC_latency
        tcc_cntrl_nodes.append(tcc_cntrl)
        tcc_cntrl.WB = options.WB_L2
        tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
        # the number_of_TBEs is inclusive of TBEs below

        # Connect the TCC controllers to the ruby network
        tcc_cntrl.requestFromTCP = MessageBuffer(ordered=True)
        tcc_cntrl.requestFromTCP.slave = ruby_system.network.master

        tcc_cntrl.responseToCore = MessageBuffer(ordered=True)
        tcc_cntrl.responseToCore.master = ruby_system.network.slave

        tcc_cntrl.probeFromNB = MessageBuffer()
        tcc_cntrl.probeFromNB.slave = ruby_system.network.master

        tcc_cntrl.responseFromNB = MessageBuffer()
        tcc_cntrl.responseFromNB.slave = ruby_system.network.master

        tcc_cntrl.requestToNB = MessageBuffer(ordered=True)
        tcc_cntrl.requestToNB.master = ruby_system.network.slave

        tcc_cntrl.responseToNB = MessageBuffer()
        tcc_cntrl.responseToNB.master = ruby_system.network.slave

        tcc_cntrl.unblockToNB = MessageBuffer()
        tcc_cntrl.unblockToNB.master = ruby_system.network.slave

        tcc_cntrl.triggerQueue = MessageBuffer(ordered=True)

        exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i)

        # connect all of the wire buffers between L3 and dirs up
        # TCC cntrls added to the GPU cluster
        gpuCluster.add(tcc_cntrl)

    for i, dma_device in enumerate(dma_devices):
        dma_seq = DMASequencer(version=i, ruby_system=ruby_system)
        dma_cntrl = DMA_Controller(version=i,
                                   dma_sequencer=dma_seq,
                                   ruby_system=ruby_system)
        exec('system.dma_cntrl%d = dma_cntrl' % i)
        if dma_device.type == 'MemTest':
            exec('system.dma_cntrl%d.dma_sequencer.slave = dma_devices.test' %
                 i)
        else:
            exec('system.dma_cntrl%d.dma_sequencer.slave = dma_device.dma' % i)
        dma_cntrl.requestToDir = MessageBuffer(buffer_size=0)
        dma_cntrl.requestToDir.master = ruby_system.network.slave
        dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0)
        dma_cntrl.responseFromDir.slave = ruby_system.network.master
        dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size=0)
        gpuCluster.add(dma_cntrl)

    # Add cpu/gpu clusters to main cluster
    mainCluster.add(cpuCluster)
    mainCluster.add(gpuCluster)

    ruby_system.network.number_of_virtual_networks = 11

    return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
Esempio n. 6
0
def create_system(options, full_system, system, dma_ports, bootmem,
                  ruby_system):

    if buildEnv['PROTOCOL'] != 'MOESI_hammer':
        panic("This script requires the MOESI_hammer protocol to be built.")

    cpu_sequencers = []

    #
    # The ruby network creation expects the list of nodes in the system to be
    # consistent with the NetDest list.  Therefore the l1 controller nodes must be
    # listed before the directory nodes and directory nodes before dma nodes, etc.
    #
    l1_cntrl_nodes = []
    dma_cntrl_nodes = []

    #
    # Must create the individual controllers before the network to ensure the
    # controller constructors are called before the network constructor
    #
    block_size_bits = int(math.log(options.cacheline_size, 2))

    for i in range(options.num_cpus):
        #
        # First create the Ruby objects associated with this cpu
        #
        l1i_cache = L1Cache(size=options.l1i_size,
                            assoc=options.l1i_assoc,
                            start_index_bit=block_size_bits,
                            is_icache=True)
        l1d_cache = L1Cache(size=options.l1d_size,
                            assoc=options.l1d_assoc,
                            start_index_bit=block_size_bits)
        l2_cache = L2Cache(size=options.l2_size,
                           assoc=options.l2_assoc,
                           start_index_bit=block_size_bits)

        # the ruby random tester reuses num_cpus to specify the
        # number of cpu ports connected to the tester object, which
        # is stored in system.cpu. because there is only ever one
        # tester object, num_cpus is not necessarily equal to the
        # size of system.cpu; therefore if len(system.cpu) == 1
        # we use system.cpu[0] to set the clk_domain, thereby ensuring
        # we don't index off the end of the cpu list.
        if len(system.cpu) == 1:
            clk_domain = system.cpu[0].clk_domain
        else:
            clk_domain = system.cpu[i].clk_domain

        l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
                                      L1Dcache=l1d_cache, L2cache=l2_cache,
                                      no_mig_atomic=not \
                                      options.allow_atomic_migration,
                                      send_evictions=send_evicts(options),
                                      transitions_per_cycle=options.ports,
                                      clk_domain=clk_domain,
                                      ruby_system=ruby_system)

        cpu_seq = RubySequencer(version=i,
                                icache=l1i_cache,
                                dcache=l1d_cache,
                                clk_domain=clk_domain,
                                ruby_system=ruby_system)

        l1_cntrl.sequencer = cpu_seq
        if options.recycle_latency:
            l1_cntrl.recycle_latency = options.recycle_latency

        exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)

        # Add controllers and sequencers to the appropriate lists
        cpu_sequencers.append(cpu_seq)
        l1_cntrl_nodes.append(l1_cntrl)

        # Connect the L1 controller and the network
        # Connect the buffers from the controller to network
        l1_cntrl.requestFromCache = MessageBuffer()
        l1_cntrl.requestFromCache.master = ruby_system.network.slave
        l1_cntrl.responseFromCache = MessageBuffer()
        l1_cntrl.responseFromCache.master = ruby_system.network.slave
        l1_cntrl.unblockFromCache = MessageBuffer()
        l1_cntrl.unblockFromCache.master = ruby_system.network.slave

        l1_cntrl.triggerQueue = MessageBuffer()

        # Connect the buffers from the network to the controller
        l1_cntrl.mandatoryQueue = MessageBuffer()
        l1_cntrl.forwardToCache = MessageBuffer()
        l1_cntrl.forwardToCache.slave = ruby_system.network.master
        l1_cntrl.responseToCache = MessageBuffer()
        l1_cntrl.responseToCache.slave = ruby_system.network.master

    #
    # determine size and index bits for probe filter
    # By default, the probe filter size is configured to be twice the
    # size of the L2 cache.
    #
    pf_size = MemorySize(options.l2_size)
    pf_size.value = pf_size.value * 2
    dir_bits = int(math.log(options.num_dirs, 2))
    pf_bits = int(math.log(pf_size.value, 2))
    if options.numa_high_bit:
        if options.pf_on or options.dir_on:
            # if numa high bit explicitly set, make sure it does not overlap
            # with the probe filter index
            assert (options.numa_high_bit - dir_bits > pf_bits)

        # set the probe filter start bit to just above the block offset
        pf_start_bit = block_size_bits
    else:
        if dir_bits > 0:
            pf_start_bit = dir_bits + block_size_bits - 1
        else:
            pf_start_bit = block_size_bits

    # Run each of the ruby memory controllers at a ratio of the frequency of
    # the ruby system
    # clk_divider value is a fix to pass regression.
    ruby_system.memctrl_clk_domain = DerivedClockDomain(
        clk_domain=ruby_system.clk_domain, clk_divider=3)

    mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
        options, bootmem, ruby_system, system)
    dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
    if rom_dir_cntrl_node is not None:
        dir_cntrl_nodes.append(rom_dir_cntrl_node)
    for dir_cntrl in dir_cntrl_nodes:
        pf = ProbeFilter(size=pf_size, assoc=4, start_index_bit=pf_start_bit)

        dir_cntrl.probeFilter = pf
        dir_cntrl.probe_filter_enabled = options.pf_on
        dir_cntrl.full_bit_dir_enabled = options.dir_on

        if options.recycle_latency:
            dir_cntrl.recycle_latency = options.recycle_latency

        # Connect the directory controller to the network
        dir_cntrl.forwardFromDir = MessageBuffer()
        dir_cntrl.forwardFromDir.master = ruby_system.network.slave
        dir_cntrl.responseFromDir = MessageBuffer()
        dir_cntrl.responseFromDir.master = ruby_system.network.slave
        dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True)
        dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave

        dir_cntrl.triggerQueue = MessageBuffer(ordered=True)

        dir_cntrl.unblockToDir = MessageBuffer()
        dir_cntrl.unblockToDir.slave = ruby_system.network.master
        dir_cntrl.responseToDir = MessageBuffer()
        dir_cntrl.responseToDir.slave = ruby_system.network.master
        dir_cntrl.requestToDir = MessageBuffer()
        dir_cntrl.requestToDir.slave = ruby_system.network.master
        dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True)
        dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master
        dir_cntrl.responseFromMemory = MessageBuffer()

    for i, dma_port in enumerate(dma_ports):
        #
        # Create the Ruby objects associated with the dma controller
        #
        dma_seq = DMASequencer(version=i,
                               ruby_system=ruby_system,
                               slave=dma_port)

        dma_cntrl = DMA_Controller(version=i,
                                   dma_sequencer=dma_seq,
                                   transitions_per_cycle=options.ports,
                                   ruby_system=ruby_system)

        exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
        dma_cntrl_nodes.append(dma_cntrl)

        if options.recycle_latency:
            dma_cntrl.recycle_latency = options.recycle_latency

        # Connect the dma controller to the network
        dma_cntrl.responseFromDir = MessageBuffer(ordered=True)
        dma_cntrl.responseFromDir.slave = ruby_system.network.master
        dma_cntrl.requestToDir = MessageBuffer()
        dma_cntrl.requestToDir.master = ruby_system.network.slave
        dma_cntrl.mandatoryQueue = MessageBuffer()

    all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes

    # Create the io controller and the sequencer
    if full_system:
        io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
        ruby_system._io_port = io_seq
        io_controller = DMA_Controller(version=len(dma_ports),
                                       dma_sequencer=io_seq,
                                       ruby_system=ruby_system)
        ruby_system.io_controller = io_controller

        # Connect the dma controller to the network
        io_controller.responseFromDir = MessageBuffer(ordered=True)
        io_controller.responseFromDir.slave = ruby_system.network.master
        io_controller.requestToDir = MessageBuffer()
        io_controller.requestToDir.master = ruby_system.network.slave
        io_controller.mandatoryQueue = MessageBuffer()

        all_cntrls = all_cntrls + [io_controller]
    # Register configuration with filesystem
    else:
        FileSystemConfig.config_filesystem(options)

        for i in xrange(options.num_cpus):
            FileSystemConfig.register_cpu(physical_package_id=0,
                                          core_siblings=[],
                                          core_id=i,
                                          thread_siblings=[])

            FileSystemConfig.register_cache(level=1,
                                            idu_type='Instruction',
                                            size=options.l1i_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l1i_assoc,
                                            cpus=[i])
            FileSystemConfig.register_cache(level=1,
                                            idu_type='Data',
                                            size=options.l1d_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l1d_assoc,
                                            cpus=[i])

            FileSystemConfig.register_cache(level=2,
                                            idu_type='Unified',
                                            size=options.l2_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l2_assoc,
                                            cpus=[i])

    ruby_system.network.number_of_virtual_networks = 6
    topology = create_topology(all_cntrls, options)
    return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
Esempio n. 7
0
def create_system(options,
                  full_system,
                  system,
                  piobus=None,
                  dma_ports=[],
                  bootmem=None):

    system.ruby = RubySystem()
    ruby = system.ruby

    # Generate pseudo filesystem
    FileSystemConfig.config_filesystem(system, options)

    # Create the network object
    (network, IntLinkClass, ExtLinkClass, RouterClass, InterfaceClass) = \
        Network.create_network(options, ruby)
    ruby.network = network

    protocol = buildEnv['PROTOCOL']
    exec("from . import %s" % protocol)
    try:
        (cpu_sequencers, dir_cntrls, topology) = \
             eval("%s.create_system(options, full_system, system, dma_ports,\
                                    bootmem, ruby)"
                  % protocol)
    except:
        print("Error: could not create sytem for ruby protocol %s" % protocol)
        raise

    # Create the network topology
    topology.makeTopology(options, network, IntLinkClass, ExtLinkClass,
                          RouterClass)

    # Register the topology elements with faux filesystem (SE mode only)
    if not full_system:
        topology.registerTopology(options)

    # Initialize network based on topology
    Network.init_network(options, network, InterfaceClass)

    # Create a port proxy for connecting the system port. This is
    # independent of the protocol and kept in the protocol-agnostic
    # part (i.e. here).
    sys_port_proxy = RubyPortProxy(ruby_system=ruby)
    if piobus is not None:
        sys_port_proxy.pio_master_port = piobus.slave

    # Give the system port proxy a SimObject parent without creating a
    # full-fledged controller
    system.sys_port_proxy = sys_port_proxy

    # Connect the system port for loading of binaries etc
    system.system_port = system.sys_port_proxy.slave

    setup_memory_controllers(system, ruby, dir_cntrls, options)

    # Connect the cpu sequencers and the piobus
    if piobus != None:
        for cpu_seq in cpu_sequencers:
            cpu_seq.pio_master_port = piobus.slave
            cpu_seq.mem_master_port = piobus.slave

            if buildEnv['TARGET_ISA'] == "x86":
                cpu_seq.pio_slave_port = piobus.master

    ruby.number_of_virtual_networks = ruby.network.number_of_virtual_networks
    ruby._cpu_ports = cpu_sequencers
    ruby.num_of_sequencers = len(cpu_sequencers)

    # Create a backing copy of physical memory in case required
    if options.access_backing_store:
        ruby.access_backing_store = True
        ruby.phys_mem = SimpleMemory(range=system.mem_ranges[0],
                                     in_addr_map=False)
Esempio n. 8
0
def create_system(options, full_system, system, dma_devices, bootmem,
                  ruby_system):
    if buildEnv['PROTOCOL'] != 'MOESI_AMD_Base':
        panic("This script requires the MOESI_AMD_Base protocol.")

    cpu_sequencers = []

    #
    # The ruby network creation expects the list of nodes in the system to
    # be consistent with the NetDest list.  Therefore the l1 controller
    # nodes must be listed before the directory nodes and directory nodes
    # before dma nodes, etc.
    #
    l1_cntrl_nodes = []
    l3_cntrl_nodes = []
    dir_cntrl_nodes = []

    control_count = 0

    #
    # Must create the individual controllers before the network to ensure
    # the controller constructors are called before the network constructor
    #

    # This is the base crossbar that connects the L3s, Dirs, and cpu
    # Cluster
    mainCluster = Cluster(extBW=512, intBW=512)  # 1 TB/s

    if options.numa_high_bit:
        numa_bit = options.numa_high_bit
    else:
        # if the numa_bit is not specified, set the directory bits as the
        # lowest bits above the block offset bits, and the numa_bit as the
        # highest of those directory bits
        dir_bits = int(math.log(options.num_dirs, 2))
        block_size_bits = int(math.log(options.cacheline_size, 2))
        numa_bit = block_size_bits + dir_bits - 1

    for i in range(options.num_dirs):
        dir_ranges = []
        for r in system.mem_ranges:
            addr_range = m5.objects.AddrRange(r.start,
                                              size=r.size(),
                                              intlvHighBit=numa_bit,
                                              intlvBits=dir_bits,
                                              intlvMatch=i)
            dir_ranges.append(addr_range)

        dir_cntrl = DirCntrl(TCC_select_num_bits=0)
        dir_cntrl.create(options, dir_ranges, ruby_system, system)

        # Connect the Directory controller to the ruby network
        dir_cntrl.requestFromCores = MessageBuffer(ordered=True)
        dir_cntrl.requestFromCores.slave = ruby_system.network.master

        dir_cntrl.responseFromCores = MessageBuffer()
        dir_cntrl.responseFromCores.slave = ruby_system.network.master

        dir_cntrl.unblockFromCores = MessageBuffer()
        dir_cntrl.unblockFromCores.slave = ruby_system.network.master

        dir_cntrl.probeToCore = MessageBuffer()
        dir_cntrl.probeToCore.master = ruby_system.network.slave

        dir_cntrl.responseToCore = MessageBuffer()
        dir_cntrl.responseToCore.master = ruby_system.network.slave

        dir_cntrl.triggerQueue = MessageBuffer(ordered=True)
        dir_cntrl.L3triggerQueue = MessageBuffer(ordered=True)

        dir_cntrl.requestToMemory = MessageBuffer()
        dir_cntrl.responseFromMemory = MessageBuffer()

        exec("system.dir_cntrl%d = dir_cntrl" % i)
        dir_cntrl_nodes.append(dir_cntrl)

        mainCluster.add(dir_cntrl)

    # Technically this config can support an odd number of cpus, but the top
    # level config files, such as the ruby_random_tester, will get confused if
    # the number of cpus does not equal the number of sequencers.  Thus make
    # sure that an even number of cpus is specified.
    assert ((options.num_cpus % 2) == 0)

    # For an odd number of CPUs, still create the right number of controllers
    cpuCluster = Cluster(extBW=512, intBW=512)  # 1 TB/s
    for i in range((options.num_cpus + 1) // 2):

        cp_cntrl = CPCntrl()
        cp_cntrl.create(options, ruby_system, system)

        exec("system.cp_cntrl%d = cp_cntrl" % i)
        #
        # Add controllers and sequencers to the appropriate lists
        #
        cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])

        # Connect the CP controllers and the network
        cp_cntrl.requestFromCore = MessageBuffer()
        cp_cntrl.requestFromCore.master = ruby_system.network.slave

        cp_cntrl.responseFromCore = MessageBuffer()
        cp_cntrl.responseFromCore.master = ruby_system.network.slave

        cp_cntrl.unblockFromCore = MessageBuffer()
        cp_cntrl.unblockFromCore.master = ruby_system.network.slave

        cp_cntrl.probeToCore = MessageBuffer()
        cp_cntrl.probeToCore.slave = ruby_system.network.master

        cp_cntrl.responseToCore = MessageBuffer()
        cp_cntrl.responseToCore.slave = ruby_system.network.master

        cp_cntrl.mandatoryQueue = MessageBuffer()
        cp_cntrl.triggerQueue = MessageBuffer(ordered=True)

        cpuCluster.add(cp_cntrl)

    # Register CPUs and caches for each CorePair and directory (SE mode only)
    if not full_system:
        for i in range((options.num_cpus + 1) // 2):
            FileSystemConfig.register_cpu(physical_package_id=0,
                                          core_siblings=range(
                                              options.num_cpus),
                                          core_id=i * 2,
                                          thread_siblings=[])

            FileSystemConfig.register_cpu(physical_package_id=0,
                                          core_siblings=range(
                                              options.num_cpus),
                                          core_id=i * 2 + 1,
                                          thread_siblings=[])

            FileSystemConfig.register_cache(level=0,
                                            idu_type='Instruction',
                                            size=options.l1i_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l1i_assoc,
                                            cpus=[i * 2, i * 2 + 1])

            FileSystemConfig.register_cache(level=0,
                                            idu_type='Data',
                                            size=options.l1d_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l1d_assoc,
                                            cpus=[i * 2])

            FileSystemConfig.register_cache(level=0,
                                            idu_type='Data',
                                            size=options.l1d_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l1d_assoc,
                                            cpus=[i * 2 + 1])

            FileSystemConfig.register_cache(level=1,
                                            idu_type='Unified',
                                            size=options.l2_size,
                                            line_size=options.cacheline_size,
                                            assoc=options.l2_assoc,
                                            cpus=[i * 2, i * 2 + 1])

        for i in range(options.num_dirs):
            FileSystemConfig.register_cache(
                level=2,
                idu_type='Unified',
                size=options.l3_size,
                line_size=options.cacheline_size,
                assoc=options.l3_assoc,
                cpus=[n for n in range(options.num_cpus)])

    # Assuming no DMA devices
    assert (len(dma_devices) == 0)

    # Add cpu/gpu clusters to main cluster
    mainCluster.add(cpuCluster)

    ruby_system.network.number_of_virtual_networks = 10

    return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
Esempio n. 9
0
def create_system(options, full_system, system, dma_devices, bootmem,
                  ruby_system, cpus):
    if buildEnv['PROTOCOL'] != 'GPU_VIPER':
        panic("This script requires the GPU_VIPER protocol to be built.")

    cpu_sequencers = []

    #
    # Must create the individual controllers before the network to ensure the
    # controller constructors are called before the network constructor
    #

    # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
    # Clusters
    crossbar_bw = None
    mainCluster = None
    cpuCluster = None
    gpuCluster = None

    if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
        #Assuming a 2GHz clock
        crossbar_bw = 16 * options.num_compute_units * options.bw_scalor
        mainCluster = Cluster(intBW = crossbar_bw)
        cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
        gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
    else:
        mainCluster = Cluster(intBW = 8) # 16 GB/s
        cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
        gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s


    # Create CPU directory controllers
    dir_cntrl_nodes = \
        construct_dirs(options, system, ruby_system, ruby_system.network)
    for dir_cntrl in dir_cntrl_nodes:
        mainCluster.add(dir_cntrl)


    # Create CPU core pairs
    (cp_sequencers, cp_cntrl_nodes) = \
        construct_corepairs(options, system, ruby_system, ruby_system.network)
    cpu_sequencers.extend(cp_sequencers)
    for cp_cntrl in cp_cntrl_nodes:
        cpuCluster.add(cp_cntrl)

    # Register CPUs and caches for each CorePair and directory (SE mode only)
    if not full_system:
        for i in range((options.num_cpus + 1) // 2):
            FileSystemConfig.register_cpu(physical_package_id = 0,
                                          core_siblings = \
                                            range(options.num_cpus),
                                          core_id = i*2,
                                          thread_siblings = [])

            FileSystemConfig.register_cpu(physical_package_id = 0,
                                          core_siblings = \
                                            range(options.num_cpus),
                                          core_id = i*2+1,
                                          thread_siblings = [])

            FileSystemConfig.register_cache(level = 0,
                                            idu_type = 'Instruction',
                                            size = options.l1i_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l1i_assoc,
                                            cpus = [i*2, i*2+1])

            FileSystemConfig.register_cache(level = 0,
                                            idu_type = 'Data',
                                            size = options.l1d_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l1d_assoc,
                                            cpus = [i*2])

            FileSystemConfig.register_cache(level = 0,
                                            idu_type = 'Data',
                                            size = options.l1d_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l1d_assoc,
                                            cpus = [i*2+1])

            FileSystemConfig.register_cache(level = 1,
                                            idu_type = 'Unified',
                                            size = options.l2_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l2_assoc,
                                            cpus = [i*2, i*2+1])

        for i in range(options.num_dirs):
            FileSystemConfig.register_cache(level = 2,
                                            idu_type = 'Unified',
                                            size = options.l3_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l3_assoc,
                                            cpus = [n for n in
                                                range(options.num_cpus)])

    # Create TCPs
    (tcp_sequencers, tcp_cntrl_nodes) = \
        construct_tcps(options, system, ruby_system, ruby_system.network)
    cpu_sequencers.extend(tcp_sequencers)
    for tcp_cntrl in tcp_cntrl_nodes:
        gpuCluster.add(tcp_cntrl)

    # Create SQCs
    (sqc_sequencers, sqc_cntrl_nodes) = \
        construct_sqcs(options, system, ruby_system, ruby_system.network)
    cpu_sequencers.extend(sqc_sequencers)
    for sqc_cntrl in sqc_cntrl_nodes:
        gpuCluster.add(sqc_cntrl)

    # Create Scalars
    (scalar_sequencers, scalar_cntrl_nodes) = \
        construct_scalars(options, system, ruby_system, ruby_system.network)
    cpu_sequencers.extend(scalar_sequencers)
    for scalar_cntrl in scalar_cntrl_nodes:
        gpuCluster.add(scalar_cntrl)

    # Create command processors
    (cmdproc_sequencers, cmdproc_cntrl_nodes) = \
        construct_cmdprocs(options, system, ruby_system, ruby_system.network)
    cpu_sequencers.extend(cmdproc_sequencers)
    for cmdproc_cntrl in cmdproc_cntrl_nodes:
        gpuCluster.add(cmdproc_cntrl)

    # Create TCCs
    tcc_cntrl_nodes = \
        construct_tccs(options, system, ruby_system, ruby_system.network)
    for tcc_cntrl in tcc_cntrl_nodes:
        gpuCluster.add(tcc_cntrl)

    for i, dma_device in enumerate(dma_devices):
        dma_seq = DMASequencer(version=i, ruby_system=ruby_system)
        dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq,
                                   ruby_system=ruby_system)
        exec('system.dma_cntrl%d = dma_cntrl' % i)

        # IDE doesn't have a .type but seems like everything else does.
        if not hasattr(dma_device, 'type'):
            exec('system.dma_cntrl%d.dma_sequencer.in_ports = dma_device' % i)
        elif dma_device.type == 'MemTest':
            exec('system.dma_cntrl%d.dma_sequencer.in_ports = dma_devices.test'
                 % i)
        else:
            exec('system.dma_cntrl%d.dma_sequencer.in_ports = dma_device.dma'
                 % i)

        dma_cntrl.requestToDir = MessageBuffer(buffer_size=0)
        dma_cntrl.requestToDir.out_port = ruby_system.network.in_port
        dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0)
        dma_cntrl.responseFromDir.in_port = ruby_system.network.out_port
        dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size = 0)
        gpuCluster.add(dma_cntrl)

    # Add cpu/gpu clusters to main cluster
    mainCluster.add(cpuCluster)
    mainCluster.add(gpuCluster)

    ruby_system.network.number_of_virtual_networks = 11

    return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
Esempio n. 10
0
    if options.checker:
        system.cpu[i].addCheckerCpu()

    system.cpu[i].createThreads()

# Redirect filesystem syscalls from src to the first matching dests
chroot = os.path.expanduser(options.chroot)
redirect_paths = [
    RedirectPath(src="/proc", dests=["%s/fs/proc" % m5.options.outdir]),
    RedirectPath(src="/sys", dests=["%s/fs/sys" % m5.options.outdir]),
    RedirectPath(src="/tmp", dests=["%s/fs/tmp" % m5.options.outdir]),
    RedirectPath(src="/", dests=["%s" % chroot])
]
system.redirect_paths = redirect_paths

FileSystemConfig.config_filesystem(options)

if options.ruby:
    Ruby.create_system(options, False, system)
    assert (options.num_cpus == len(system.ruby._cpu_ports))

    system.ruby.clk_domain = SrcClockDomain(
        clock=options.ruby_clock, voltage_domain=system.voltage_domain)
    for i in xrange(np):
        ruby_port = system.ruby._cpu_ports[i]

        # Create the interrupt controller and connect its ports to Ruby
        # Note that the interrupt controller is always present but only
        # in x86 does it have message ports that need to be connected
        system.cpu[i].createInterruptController()
Esempio n. 11
0
    def makeTopology(self, options, full_system, network, IntLink, ExtLink,
                     Router):
        nodes = self.nodes

        num_routers = options.num_cpus
        num_rows = options.mesh_rows

        # default values for link latency and router latency.
        # Can be over-ridden on a per link/router basis
        link_latency = options.link_latency  # used by simple and garnet
        router_latency = options.router_latency  # only used by garnet

        # There must be an evenly divisible number of cntrls to routers
        # Also, obviously the number or rows must be <= the number of routers
        cntrls_per_router, remainder = divmod(len(nodes), num_routers)
        assert (num_rows > 0 and num_rows <= num_routers)
        num_columns = int(num_routers / num_rows)
        assert (num_columns * num_rows == num_routers)

        # Create the routers in the mesh
        routers = [Router(router_id=i, latency = router_latency) \
            for i in range(num_routers)]
        network.routers = routers

        # link counter to set unique link ids
        link_count = 0

        # Add all but the remainder nodes to the list of nodes to be uniformly
        # distributed across the network.
        network_nodes = []
        remainder_nodes = []
        for node_index in xrange(len(nodes)):
            if node_index < (len(nodes) - remainder):
                network_nodes.append(nodes[node_index])
            else:
                remainder_nodes.append(nodes[node_index])

        # Connect each node to the appropriate router
        ext_links = []
        for (i, n) in enumerate(network_nodes):
            cntrl_level, router_id = divmod(i, num_routers)
            assert (cntrl_level < cntrls_per_router)
            ext_links.append(
                ExtLink(link_id=link_count,
                        ext_node=n,
                        int_node=routers[router_id],
                        latency=link_latency))
            link_count += 1

        # Connect the remainding nodes to router 0.  These should only be
        # DMA nodes.
        for (i, node) in enumerate(remainder_nodes):
            assert (node.type == 'DMA_Controller')
            assert (i < remainder)
            ext_links.append(
                ExtLink(link_id=link_count,
                        ext_node=node,
                        int_node=routers[0],
                        latency=link_latency))
            link_count += 1

        network.ext_links = ext_links

        # Create the mesh links.
        int_links = []

        # East output to West input links (weight = 1)
        for row in xrange(num_rows):
            for col in xrange(num_columns):
                if (col + 1 < num_columns):
                    east_out = col + (row * num_columns)
                    west_in = (col + 1) + (row * num_columns)
                    int_links.append(
                        IntLink(link_id=link_count,
                                src_node=routers[east_out],
                                dst_node=routers[west_in],
                                src_outport="East",
                                dst_inport="West",
                                latency=link_latency,
                                weight=1))
                    link_count += 1

        # West output to East input links (weight = 1)
        for row in xrange(num_rows):
            for col in xrange(num_columns):
                if (col + 1 < num_columns):
                    east_in = col + (row * num_columns)
                    west_out = (col + 1) + (row * num_columns)
                    int_links.append(
                        IntLink(link_id=link_count,
                                src_node=routers[west_out],
                                dst_node=routers[east_in],
                                src_outport="West",
                                dst_inport="East",
                                latency=link_latency,
                                weight=1))
                    link_count += 1

        # North output to South input links (weight = 2)
        for col in xrange(num_columns):
            for row in xrange(num_rows):
                if (row + 1 < num_rows):
                    north_out = col + (row * num_columns)
                    south_in = col + ((row + 1) * num_columns)
                    int_links.append(
                        IntLink(link_id=link_count,
                                src_node=routers[north_out],
                                dst_node=routers[south_in],
                                src_outport="North",
                                dst_inport="South",
                                latency=link_latency,
                                weight=2))
                    link_count += 1

        # South output to North input links (weight = 2)
        for col in xrange(num_columns):
            for row in xrange(num_rows):
                if (row + 1 < num_rows):
                    north_in = col + (row * num_columns)
                    south_out = col + ((row + 1) * num_columns)
                    int_links.append(
                        IntLink(link_id=link_count,
                                src_node=routers[south_out],
                                dst_node=routers[north_in],
                                src_outport="South",
                                dst_inport="North",
                                latency=link_latency,
                                weight=2))
                    link_count += 1

        network.int_links = int_links

        # Register nodes with filesystem
        if not full_system:
            for i in xrange(options.num_cpus):
                FileSystemConfig.register_node([i],
                                               MemorySize(options.mem_size) /
                                               options.num_cpus)
Esempio n. 12
0
def create_system(options, full_system, system, dma_devices, bootmem,
                  ruby_system):
    if buildEnv['PROTOCOL'] != 'MOESI_AMD_Base':
        panic("This script requires the MOESI_AMD_Base protocol.")

    cpu_sequencers = []

    #
    # The ruby network creation expects the list of nodes in the system to
    # be consistent with the NetDest list.  Therefore the l1 controller
    # nodes must be listed before the directory nodes and directory nodes
    # before dma nodes, etc.
    #
    l1_cntrl_nodes = []
    l3_cntrl_nodes = []
    dir_cntrl_nodes = []

    control_count = 0

    #
    # Must create the individual controllers before the network to ensure
    # the controller constructors are called before the network constructor
    #

    # This is the base crossbar that connects the L3s, Dirs, and cpu
    # Cluster
    mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s

    if options.numa_high_bit:
        numa_bit = options.numa_high_bit
    else:
        # if the numa_bit is not specified, set the directory bits as the
        # lowest bits above the block offset bits, and the numa_bit as the
        # highest of those directory bits
        dir_bits = int(math.log(options.num_dirs, 2))
        block_size_bits = int(math.log(options.cacheline_size, 2))
        numa_bit = block_size_bits + dir_bits - 1

    for i in range(options.num_dirs):
        dir_ranges = []
        for r in system.mem_ranges:
            addr_range = m5.objects.AddrRange(r.start, size = r.size(),
                                              intlvHighBit = numa_bit,
                                              intlvBits = dir_bits,
                                              intlvMatch = i)
            dir_ranges.append(addr_range)


        dir_cntrl = DirCntrl(TCC_select_num_bits = 0)
        dir_cntrl.create(options, dir_ranges, ruby_system, system)

        # Connect the Directory controller to the ruby network
        dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
        dir_cntrl.requestFromCores.slave = ruby_system.network.master

        dir_cntrl.responseFromCores = MessageBuffer()
        dir_cntrl.responseFromCores.slave = ruby_system.network.master

        dir_cntrl.unblockFromCores = MessageBuffer()
        dir_cntrl.unblockFromCores.slave = ruby_system.network.master

        dir_cntrl.probeToCore = MessageBuffer()
        dir_cntrl.probeToCore.master = ruby_system.network.slave

        dir_cntrl.responseToCore = MessageBuffer()
        dir_cntrl.responseToCore.master = ruby_system.network.slave

        dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
        dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
        dir_cntrl.responseFromMemory = MessageBuffer()

        exec("system.dir_cntrl%d = dir_cntrl" % i)
        dir_cntrl_nodes.append(dir_cntrl)

        mainCluster.add(dir_cntrl)

    # Technically this config can support an odd number of cpus, but the top
    # level config files, such as the ruby_random_tester, will get confused if
    # the number of cpus does not equal the number of sequencers.  Thus make
    # sure that an even number of cpus is specified.
    assert((options.num_cpus % 2) == 0)

    # For an odd number of CPUs, still create the right number of controllers
    cpuCluster = Cluster(extBW = 512, intBW = 512)  # 1 TB/s
    for i in range((options.num_cpus + 1) // 2):

        cp_cntrl = CPCntrl()
        cp_cntrl.create(options, ruby_system, system)

        exec("system.cp_cntrl%d = cp_cntrl" % i)
        #
        # Add controllers and sequencers to the appropriate lists
        #
        cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])

        # Connect the CP controllers and the network
        cp_cntrl.requestFromCore = MessageBuffer()
        cp_cntrl.requestFromCore.master = ruby_system.network.slave

        cp_cntrl.responseFromCore = MessageBuffer()
        cp_cntrl.responseFromCore.master = ruby_system.network.slave

        cp_cntrl.unblockFromCore = MessageBuffer()
        cp_cntrl.unblockFromCore.master = ruby_system.network.slave

        cp_cntrl.probeToCore = MessageBuffer()
        cp_cntrl.probeToCore.slave = ruby_system.network.master

        cp_cntrl.responseToCore = MessageBuffer()
        cp_cntrl.responseToCore.slave = ruby_system.network.master

        cp_cntrl.mandatoryQueue = MessageBuffer()
        cp_cntrl.triggerQueue = MessageBuffer(ordered = True)

        cpuCluster.add(cp_cntrl)

    # Register CPUs and caches for each CorePair and directory (SE mode only)
    if not full_system:
        FileSystemConfig.config_filesystem(options)
        for i in xrange((options.num_cpus + 1) // 2):
            FileSystemConfig.register_cpu(physical_package_id = 0,
                                          core_siblings =
                                            xrange(options.num_cpus),
                                          core_id = i*2,
                                          thread_siblings = [])

            FileSystemConfig.register_cpu(physical_package_id = 0,
                                          core_siblings =
                                            xrange(options.num_cpus),
                                          core_id = i*2+1,
                                          thread_siblings = [])

            FileSystemConfig.register_cache(level = 0,
                                            idu_type = 'Instruction',
                                            size = options.l1i_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l1i_assoc,
                                            cpus = [i*2, i*2+1])

            FileSystemConfig.register_cache(level = 0,
                                            idu_type = 'Data',
                                            size = options.l1d_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l1d_assoc,
                                            cpus = [i*2])

            FileSystemConfig.register_cache(level = 0,
                                            idu_type = 'Data',
                                            size = options.l1d_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l1d_assoc,
                                            cpus = [i*2+1])

            FileSystemConfig.register_cache(level = 1,
                                            idu_type = 'Unified',
                                            size = options.l2_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l2_assoc,
                                            cpus = [i*2, i*2+1])

        for i in range(options.num_dirs):
            FileSystemConfig.register_cache(level = 2,
                                            idu_type = 'Unified',
                                            size = options.l3_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l3_assoc,
                                            cpus = [n for n in
                                                xrange(options.num_cpus)])

    # Assuming no DMA devices
    assert(len(dma_devices) == 0)

    # Add cpu/gpu clusters to main cluster
    mainCluster.add(cpuCluster)

    ruby_system.network.number_of_virtual_networks = 10

    return (cpu_sequencers, dir_cntrl_nodes, mainCluster)
Esempio n. 13
0
    print("Switch at instruction count: %d" % cpu_list[0].max_insts_any_thread)

exit_event = m5.simulate(maxtick)

if options.fast_forward:
    if exit_event.getCause() == "a thread reached the max instruction count":
        m5.switchCpus(system, switch_cpu_list)
        print("Switched CPUS @ tick %s" % (m5.curTick()))
        m5.stats.reset()
        exit_event = m5.simulate(maxtick - m5.curTick())
elif options.fast_forward_pseudo_op:
    while exit_event.getCause() == "switchcpu":
        # If we are switching *to* kvm, then the current stats are meaningful
        # Note that we don't do any warmup by default
        if type(switch_cpu_list[0][0]) == FutureCpuClass:
            print("Dumping stats...")
            m5.stats.dump()
        m5.switchCpus(system, switch_cpu_list)
        print("Switched CPUS @ tick %s" % (m5.curTick()))
        m5.stats.reset()
        # This lets us switch back and forth without keeping a counter
        switch_cpu_list = [(x[1], x[0]) for x in switch_cpu_list]
        exit_event = m5.simulate(maxtick - m5.curTick())

print("Ticks:", m5.curTick())
print('Exiting because ', exit_event.getCause())

FileSystemConfig.cleanup_filesystem(options)

sys.exit(exit_event.getCode())
Esempio n. 14
0
def create_system(options, full_system, system, dma_ports, bootmem,
                  ruby_system):

    if buildEnv['PROTOCOL'] != 'MESI_Three_Level':
        fatal("This script requires the MESI_Three_Level protocol to be\
               built.")

    cpu_sequencers = []

    #
    # The ruby network creation expects the list of nodes in the system to be
    # consistent with the NetDest list.  Therefore the l1 controller nodes
    # must be listed before the directory nodes and directory nodes before
    # dma nodes, etc.
    #
    l0_cntrl_nodes = []
    l1_cntrl_nodes = []
    l2_cntrl_nodes = []
    dma_cntrl_nodes = []

    assert (options.num_cpus % options.num_clusters == 0)
    num_cpus_per_cluster = options.num_cpus / options.num_clusters

    assert (options.num_l2caches % options.num_clusters == 0)
    num_l2caches_per_cluster = options.num_l2caches / options.num_clusters

    l2_bits = int(math.log(num_l2caches_per_cluster, 2))
    block_size_bits = int(math.log(options.cacheline_size, 2))
    l2_index_start = block_size_bits + l2_bits

    #
    # Must create the individual controllers before the network to ensure the
    # controller constructors are called before the network constructor
    #
    for i in range(options.num_clusters):
        for j in range(num_cpus_per_cluster):
            #
            # First create the Ruby objects associated with this cpu
            #
            l0i_cache = L0Cache(size = '4096B', assoc = 1, is_icache = True,
                start_index_bit = block_size_bits,
                replacement_policy = LRUReplacementPolicy())

            l0d_cache = L0Cache(size = '4096B', assoc = 1, is_icache = False,
                start_index_bit = block_size_bits,
                replacement_policy = LRUReplacementPolicy())

            # the ruby random tester reuses num_cpus to specify the
            # number of cpu ports connected to the tester object, which
            # is stored in system.cpu. because there is only ever one
            # tester object, num_cpus is not necessarily equal to the
            # size of system.cpu; therefore if len(system.cpu) == 1
            # we use system.cpu[0] to set the clk_domain, thereby ensuring
            # we don't index off the end of the cpu list.
            if len(system.cpu) == 1:
                clk_domain = system.cpu[0].clk_domain
            else:
                clk_domain = system.cpu[i].clk_domain

            l0_cntrl = L0Cache_Controller(
                   version = i * num_cpus_per_cluster + j, Icache = l0i_cache,
                   Dcache = l0d_cache, send_evictions = send_evicts(options),
                   clk_domain = clk_domain, ruby_system = ruby_system)

            cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j,
                                    icache = l0i_cache,
                                    clk_domain = clk_domain,
                                    dcache = l0d_cache,
                                    ruby_system = ruby_system)

            l0_cntrl.sequencer = cpu_seq

            l1_cache = L1Cache(size = options.l1d_size,
                               assoc = options.l1d_assoc,
                               start_index_bit = block_size_bits,
                               is_icache = False)

            l1_cntrl = L1Cache_Controller(
                    version = i * num_cpus_per_cluster + j,
                    cache = l1_cache, l2_select_num_bits = l2_bits,
                    cluster_id = i, ruby_system = ruby_system)

            exec("ruby_system.l0_cntrl%d = l0_cntrl"
                 % ( i * num_cpus_per_cluster + j))
            exec("ruby_system.l1_cntrl%d = l1_cntrl"
                 % ( i * num_cpus_per_cluster + j))

            #
            # Add controllers and sequencers to the appropriate lists
            #
            cpu_sequencers.append(cpu_seq)
            l0_cntrl_nodes.append(l0_cntrl)
            l1_cntrl_nodes.append(l1_cntrl)

            # Connect the L0 and L1 controllers
            l0_cntrl.mandatoryQueue = MessageBuffer()
            l0_cntrl.bufferToL1 = MessageBuffer(ordered = True)
            l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1
            l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True)
            l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1

            # Connect the L1 controllers and the network
            l1_cntrl.requestToL2 = MessageBuffer()
            l1_cntrl.requestToL2.master = ruby_system.network.slave
            l1_cntrl.responseToL2 = MessageBuffer()
            l1_cntrl.responseToL2.master = ruby_system.network.slave
            l1_cntrl.unblockToL2 = MessageBuffer()
            l1_cntrl.unblockToL2.master = ruby_system.network.slave

            l1_cntrl.requestFromL2 = MessageBuffer()
            l1_cntrl.requestFromL2.slave = ruby_system.network.master
            l1_cntrl.responseFromL2 = MessageBuffer()
            l1_cntrl.responseFromL2.slave = ruby_system.network.master


        for j in range(num_l2caches_per_cluster):
            l2_cache = L2Cache(size = options.l2_size,
                               assoc = options.l2_assoc,
                               start_index_bit = l2_index_start)

            l2_cntrl = L2Cache_Controller(
                        version = i * num_l2caches_per_cluster + j,
                        L2cache = l2_cache, cluster_id = i,
                        transitions_per_cycle = options.ports,
                        ruby_system = ruby_system)

            exec("ruby_system.l2_cntrl%d = l2_cntrl"
                 % (i * num_l2caches_per_cluster + j))
            l2_cntrl_nodes.append(l2_cntrl)

            # Connect the L2 controllers and the network
            l2_cntrl.DirRequestFromL2Cache = MessageBuffer()
            l2_cntrl.DirRequestFromL2Cache.master = ruby_system.network.slave
            l2_cntrl.L1RequestFromL2Cache = MessageBuffer()
            l2_cntrl.L1RequestFromL2Cache.master = ruby_system.network.slave
            l2_cntrl.responseFromL2Cache = MessageBuffer()
            l2_cntrl.responseFromL2Cache.master = ruby_system.network.slave

            l2_cntrl.unblockToL2Cache = MessageBuffer()
            l2_cntrl.unblockToL2Cache.slave = ruby_system.network.master
            l2_cntrl.L1RequestToL2Cache = MessageBuffer()
            l2_cntrl.L1RequestToL2Cache.slave = ruby_system.network.master
            l2_cntrl.responseToL2Cache = MessageBuffer()
            l2_cntrl.responseToL2Cache.slave = ruby_system.network.master

    # Run each of the ruby memory controllers at a ratio of the frequency of
    # the ruby system
    # clk_divider value is a fix to pass regression.
    ruby_system.memctrl_clk_domain = DerivedClockDomain(
            clk_domain = ruby_system.clk_domain, clk_divider = 3)

    mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
        options, bootmem, ruby_system, system)
    dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
    if rom_dir_cntrl_node is not None:
        dir_cntrl_nodes.append(rom_dir_cntrl_node)
    for dir_cntrl in dir_cntrl_nodes:
        # Connect the directory controllers and the network
        dir_cntrl.requestToDir = MessageBuffer()
        dir_cntrl.requestToDir.slave = ruby_system.network.master
        dir_cntrl.responseToDir = MessageBuffer()
        dir_cntrl.responseToDir.slave = ruby_system.network.master
        dir_cntrl.responseFromDir = MessageBuffer()
        dir_cntrl.responseFromDir.master = ruby_system.network.slave
        dir_cntrl.responseFromMemory = MessageBuffer()

    for i, dma_port in enumerate(dma_ports):
        #
        # Create the Ruby objects associated with the dma controller
        #
        dma_seq = DMASequencer(version = i, ruby_system = ruby_system)

        dma_cntrl = DMA_Controller(version = i,
                                   dma_sequencer = dma_seq,
                                   transitions_per_cycle = options.ports,
                                   ruby_system = ruby_system)

        exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
        exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i)
        dma_cntrl_nodes.append(dma_cntrl)

        # Connect the dma controller to the network
        dma_cntrl.mandatoryQueue = MessageBuffer()
        dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
        dma_cntrl.responseFromDir.slave = ruby_system.network.master
        dma_cntrl.requestToDir = MessageBuffer()
        dma_cntrl.requestToDir.master = ruby_system.network.slave

    all_cntrls = l0_cntrl_nodes + \
                 l1_cntrl_nodes + \
                 l2_cntrl_nodes + \
                 dir_cntrl_nodes + \
                 dma_cntrl_nodes

    # Create the io controller and the sequencer
    if full_system:
        io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
        ruby_system._io_port = io_seq
        io_controller = DMA_Controller(version = len(dma_ports),
                                       dma_sequencer = io_seq,
                                       ruby_system = ruby_system)
        ruby_system.io_controller = io_controller

        # Connect the dma controller to the network
        io_controller.mandatoryQueue = MessageBuffer()
        io_controller.responseFromDir = MessageBuffer(ordered = True)
        io_controller.responseFromDir.slave = ruby_system.network.master
        io_controller.requestToDir = MessageBuffer()
        io_controller.requestToDir.master = ruby_system.network.slave

        all_cntrls = all_cntrls + [io_controller]
    # Register configuration with filesystem
    else:
        FileSystemConfig.config_filesystem(options)

        for i in xrange(options.num_clusters):
            for j in xrange(num_cpus_per_cluster):
                FileSystemConfig.register_cpu(physical_package_id = 0,
                                              core_siblings = xrange(options.num_cpus),
                                              core_id = i*num_cpus_per_cluster+j,
                                              thread_siblings = [])

                FileSystemConfig.register_cache(level = 0,
                                                idu_type = 'Instruction',
                                                size = '4096B',
                                                line_size = options.cacheline_size,
                                                assoc = 1,
                                                cpus = [i*num_cpus_per_cluster+j])
                FileSystemConfig.register_cache(level = 0,
                                                idu_type = 'Data',
                                                size = '4096B',
                                                line_size = options.cacheline_size,
                                                assoc = 1,
                                                cpus = [i*num_cpus_per_cluster+j])

                FileSystemConfig.register_cache(level = 1,
                                                idu_type = 'Unified',
                                                size = options.l1d_size,
                                                line_size = options.cacheline_size,
                                                assoc = options.l1d_assoc,
                                                cpus = [i*num_cpus_per_cluster+j])

            FileSystemConfig.register_cache(level = 2,
                                            idu_type = 'Unified',
                                            size = str(MemorySize(options.l2_size) * \
                                                   num_l2caches_per_cluster)+'B',
                                            line_size = options.cacheline_size,
                                            assoc = options.l2_assoc,
                                            cpus = [n for n in xrange(i*num_cpus_per_cluster, \
                                                                     (i+1)*num_cpus_per_cluster)])

    ruby_system.network.number_of_virtual_networks = 3
    topology = create_topology(all_cntrls, options)
    return (cpu_sequencers, mem_dir_cntrl_nodes, topology)
Esempio n. 15
0
def create_system(options, full_system, system, piobus = None, dma_ports = [],
                  bootmem=None):

    system.ruby = RubySystem()
    ruby = system.ruby

    # Generate pseudo filesystem
    FileSystemConfig.config_filesystem(system, options)

    # Create the network object
    (network, IntLinkClass, ExtLinkClass, RouterClass, InterfaceClass) = \
        Network.create_network(options, ruby)
    ruby.network = network

    protocol = buildEnv['PROTOCOL']
    exec("from . import %s" % protocol)
    try:
        (cpu_sequencers, dir_cntrls, topology) = \
             eval("%s.create_system(options, full_system, system, dma_ports,\
                                    bootmem, ruby)"
                  % protocol)
    except:
        print("Error: could not create sytem for ruby protocol %s" % protocol)
        raise

    # Create the network topology
    topology.makeTopology(options, network, IntLinkClass, ExtLinkClass,
            RouterClass)

    # Register the topology elements with faux filesystem (SE mode only)
    if not full_system:
        topology.registerTopology(options)


    # Initialize network based on topology
    Network.init_network(options, network, InterfaceClass)

    # Create a port proxy for connecting the system port. This is
    # independent of the protocol and kept in the protocol-agnostic
    # part (i.e. here).
    sys_port_proxy = RubyPortProxy(ruby_system = ruby)
    if piobus is not None:
        sys_port_proxy.pio_master_port = piobus.slave

    # Give the system port proxy a SimObject parent without creating a
    # full-fledged controller
    system.sys_port_proxy = sys_port_proxy

    # Connect the system port for loading of binaries etc
    system.system_port = system.sys_port_proxy.slave

    setup_memory_controllers(system, ruby, dir_cntrls, options)

    # Connect the cpu sequencers and the piobus
    if piobus != None:
        for cpu_seq in cpu_sequencers:
            cpu_seq.pio_master_port = piobus.slave
            cpu_seq.mem_master_port = piobus.slave

            if buildEnv['TARGET_ISA'] == "x86":
                cpu_seq.pio_slave_port = piobus.master

    ruby.number_of_virtual_networks = ruby.network.number_of_virtual_networks
    ruby._cpu_ports = cpu_sequencers
    ruby.num_of_sequencers = len(cpu_sequencers)

    # Create a backing copy of physical memory in case required
    if options.access_backing_store:
        ruby.access_backing_store = True
        ruby.phys_mem = SimpleMemory(range=system.mem_ranges[0],
                                     in_addr_map=False)
Esempio n. 16
0
def create_system(options, full_system, system, dma_ports, bootmem,
                  ruby_system):

    if buildEnv['PROTOCOL'] != 'MOESI_hammer':
        panic("This script requires the MOESI_hammer protocol to be built.")

    cpu_sequencers = []

    #
    # The ruby network creation expects the list of nodes in the system to be
    # consistent with the NetDest list.  Therefore the l1 controller nodes must be
    # listed before the directory nodes and directory nodes before dma nodes, etc.
    #
    l1_cntrl_nodes = []
    dma_cntrl_nodes = []

    #
    # Must create the individual controllers before the network to ensure the
    # controller constructors are called before the network constructor
    #
    block_size_bits = int(math.log(options.cacheline_size, 2))

    for i in range(options.num_cpus):
        #
        # First create the Ruby objects associated with this cpu
        #
        l1i_cache = L1Cache(size = options.l1i_size,
                            assoc = options.l1i_assoc,
                            start_index_bit = block_size_bits,
                            is_icache = True)
        l1d_cache = L1Cache(size = options.l1d_size,
                            assoc = options.l1d_assoc,
                            start_index_bit = block_size_bits)
        l2_cache = L2Cache(size = options.l2_size,
                           assoc = options.l2_assoc,
                           start_index_bit = block_size_bits)

        # the ruby random tester reuses num_cpus to specify the
        # number of cpu ports connected to the tester object, which
        # is stored in system.cpu. because there is only ever one
        # tester object, num_cpus is not necessarily equal to the
        # size of system.cpu; therefore if len(system.cpu) == 1
        # we use system.cpu[0] to set the clk_domain, thereby ensuring
        # we don't index off the end of the cpu list.
        if len(system.cpu) == 1:
            clk_domain = system.cpu[0].clk_domain
        else:
            clk_domain = system.cpu[i].clk_domain

        l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
                                      L1Dcache=l1d_cache, L2cache=l2_cache,
                                      no_mig_atomic=not \
                                      options.allow_atomic_migration,
                                      send_evictions=send_evicts(options),
                                      transitions_per_cycle=options.ports,
                                      clk_domain=clk_domain,
                                      ruby_system=ruby_system)

        cpu_seq = RubySequencer(version=i, icache=l1i_cache,
                                dcache=l1d_cache,clk_domain=clk_domain,
                                ruby_system=ruby_system)

        l1_cntrl.sequencer = cpu_seq
        if options.recycle_latency:
            l1_cntrl.recycle_latency = options.recycle_latency

        exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)

        # Add controllers and sequencers to the appropriate lists
        cpu_sequencers.append(cpu_seq)
        l1_cntrl_nodes.append(l1_cntrl)

        # Connect the L1 controller and the network
        # Connect the buffers from the controller to network
        l1_cntrl.requestFromCache = MessageBuffer()
        l1_cntrl.requestFromCache.master = ruby_system.network.slave
        l1_cntrl.responseFromCache = MessageBuffer()
        l1_cntrl.responseFromCache.master = ruby_system.network.slave
        l1_cntrl.unblockFromCache = MessageBuffer()
        l1_cntrl.unblockFromCache.master = ruby_system.network.slave

        l1_cntrl.triggerQueue = MessageBuffer()

        # Connect the buffers from the network to the controller
        l1_cntrl.mandatoryQueue = MessageBuffer()
        l1_cntrl.forwardToCache = MessageBuffer()
        l1_cntrl.forwardToCache.slave = ruby_system.network.master
        l1_cntrl.responseToCache = MessageBuffer()
        l1_cntrl.responseToCache.slave = ruby_system.network.master


    #
    # determine size and index bits for probe filter
    # By default, the probe filter size is configured to be twice the
    # size of the L2 cache.
    #
    pf_size = MemorySize(options.l2_size)
    pf_size.value = pf_size.value * 2
    dir_bits = int(math.log(options.num_dirs, 2))
    pf_bits = int(math.log(pf_size.value, 2))
    if options.numa_high_bit:
        if options.pf_on or options.dir_on:
            # if numa high bit explicitly set, make sure it does not overlap
            # with the probe filter index
            assert(options.numa_high_bit - dir_bits > pf_bits)

        # set the probe filter start bit to just above the block offset
        pf_start_bit = block_size_bits
    else:
        if dir_bits > 0:
            pf_start_bit = dir_bits + block_size_bits - 1
        else:
            pf_start_bit = block_size_bits

    # Run each of the ruby memory controllers at a ratio of the frequency of
    # the ruby system
    # clk_divider value is a fix to pass regression.
    ruby_system.memctrl_clk_domain = DerivedClockDomain(
                                          clk_domain=ruby_system.clk_domain,
                                          clk_divider=3)

    mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
        options, bootmem, ruby_system, system)
    dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
    if rom_dir_cntrl_node is not None:
        dir_cntrl_nodes.append(rom_dir_cntrl_node)
    for dir_cntrl in dir_cntrl_nodes:
        pf = ProbeFilter(size = pf_size, assoc = 4,
                         start_index_bit = pf_start_bit)

        dir_cntrl.probeFilter = pf
        dir_cntrl.probe_filter_enabled = options.pf_on
        dir_cntrl.full_bit_dir_enabled = options.dir_on

        if options.recycle_latency:
            dir_cntrl.recycle_latency = options.recycle_latency

        # Connect the directory controller to the network
        dir_cntrl.forwardFromDir = MessageBuffer()
        dir_cntrl.forwardFromDir.master = ruby_system.network.slave
        dir_cntrl.responseFromDir = MessageBuffer()
        dir_cntrl.responseFromDir.master = ruby_system.network.slave
        dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True)
        dir_cntrl.dmaResponseFromDir.master = ruby_system.network.slave

        dir_cntrl.triggerQueue = MessageBuffer(ordered = True)

        dir_cntrl.unblockToDir = MessageBuffer()
        dir_cntrl.unblockToDir.slave = ruby_system.network.master
        dir_cntrl.responseToDir = MessageBuffer()
        dir_cntrl.responseToDir.slave = ruby_system.network.master
        dir_cntrl.requestToDir = MessageBuffer()
        dir_cntrl.requestToDir.slave = ruby_system.network.master
        dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True)
        dir_cntrl.dmaRequestToDir.slave = ruby_system.network.master
        dir_cntrl.responseFromMemory = MessageBuffer()


    for i, dma_port in enumerate(dma_ports):
        #
        # Create the Ruby objects associated with the dma controller
        #
        dma_seq = DMASequencer(version = i,
                               ruby_system = ruby_system,
                               slave = dma_port)

        dma_cntrl = DMA_Controller(version = i,
                                   dma_sequencer = dma_seq,
                                   transitions_per_cycle = options.ports,
                                   ruby_system = ruby_system)

        exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
        dma_cntrl_nodes.append(dma_cntrl)

        if options.recycle_latency:
            dma_cntrl.recycle_latency = options.recycle_latency

        # Connect the dma controller to the network
        dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
        dma_cntrl.responseFromDir.slave = ruby_system.network.master
        dma_cntrl.requestToDir = MessageBuffer()
        dma_cntrl.requestToDir.master = ruby_system.network.slave
        dma_cntrl.mandatoryQueue = MessageBuffer()

    all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes

    # Create the io controller and the sequencer
    if full_system:
        io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
        ruby_system._io_port = io_seq
        io_controller = DMA_Controller(version = len(dma_ports),
                                       dma_sequencer = io_seq,
                                       ruby_system = ruby_system)
        ruby_system.io_controller = io_controller

        # Connect the dma controller to the network
        io_controller.responseFromDir = MessageBuffer(ordered = True)
        io_controller.responseFromDir.slave = ruby_system.network.master
        io_controller.requestToDir = MessageBuffer()
        io_controller.requestToDir.master = ruby_system.network.slave
        io_controller.mandatoryQueue = MessageBuffer()

        all_cntrls = all_cntrls + [io_controller]
    # Register configuration with filesystem
    else:
        for i in xrange(options.num_cpus):
            FileSystemConfig.register_cpu(physical_package_id = 0,
                                          core_siblings = [],
                                          core_id = i,
                                          thread_siblings = [])

            FileSystemConfig.register_cache(level = 1,
                                            idu_type = 'Instruction',
                                            size = options.l1i_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l1i_assoc,
                                            cpus = [i])
            FileSystemConfig.register_cache(level = 1,
                                            idu_type = 'Data',
                                            size = options.l1d_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l1d_assoc,
                                            cpus = [i])

            FileSystemConfig.register_cache(level = 2,
                                            idu_type = 'Unified',
                                            size = options.l2_size,
                                            line_size = options.cacheline_size,
                                            assoc = options.l2_assoc,
                                            cpus = [i])

    ruby_system.network.number_of_virtual_networks = 6
    topology = create_topology(all_cntrls, options)
    return (cpu_sequencers, mem_dir_cntrl_nodes, topology)