}) backend = comp_memory.setSubComponent("backend", "memHierarchy.simpleMem") backend.addParams({ "mem_size" : "1024MiB", }) comp_tracer = sst.Component("tracer", "simpleElementExample.simpleTracerComponent") comp_tracer.addParams({ "clock" : "2 Ghz", "debug" : "8", "statistics" : "1", "pageSize" : "4096", "accessLatencyBins" : "10", "tracePrefix" : "test_simpleTracerComponent_2_mem_ref_trace.txt", "statsPrefix" : "test_simpleTracerComponent_2_mem_ref_stats.txt", }) # define the simulation links link_cpu_l1cache = sst.Link("link_cpu_l1cache") link_cpu_l1cache.connect((comp_cpu, "mem_link", "100ps"),(comp_l1cache, "high_network_0", "100ps")) link_l1cache_l2cache = sst.Link("link_l1cache_l2cache") link_l1cache_l2cache.connect((comp_l1cache, "low_network_0", "100ps"), (comp_l2cache, "high_network_0", "100ps")) link_l2cache_tracer = sst.Link("link_l2cache_tracer") link_l2cache_tracer.connect((comp_l2cache, "low_network_0", "100ps"), (comp_tracer, "northBus", "100ps")) link_tracer_mem = sst.Link("link_tracer_mem") link_tracer_mem.connect((comp_tracer, "southBus", "100ps"), (comp_memory, "direct_link", "100ps"))
"clock": "1GHz", "backend.mem_size": "512MiB", "backend.access_time": "100 ns", "backend.system_ini": "system.ini", "backend.device_ini": "DDR3_micron_32M_8B_x4_sg125.ini", "backend": "memHierarchy.dramsim" }) # Enable statistics sst.setStatisticLoadLevel(7) sst.setStatisticOutput("sst.statOutputConsole") sst.enableAllStatisticsForComponentType("memHierarchy.Cache") sst.enableAllStatisticsForComponentType("memHierarchy.MemController") # Define the simulation links link_cpu0_l1cache_link = sst.Link("link_cpu0_l1cache_link") link_cpu0_l1cache_link.connect((comp_cpu0, "mem_link", "1000ps"), (comp_c0_l1cache, "high_network_0", "1000ps")) link_c0_l1_l2_link = sst.Link("link_c0_l1_l2_link") link_c0_l1_l2_link.connect((comp_c0_l1cache, "low_network_0", "1000ps"), (comp_bus, "high_network_0", "10000ps")) link_cpu1_l1cache_link = sst.Link("link_cpu1_l1cache_link") link_cpu1_l1cache_link.connect((comp_cpu1, "mem_link", "1000ps"), (comp_c1_l1cache, "high_network_0", "1000ps")) link_c1_l1_l2_link = sst.Link("link_c1_l1_l2_link") link_c1_l1_l2_link.connect((comp_c1_l1cache, "low_network_0", "1000ps"), (comp_bus, "high_network_1", "10000ps")) link_bus_l2cache = sst.Link("link_bus_l2cache") link_bus_l2cache.connect((comp_bus, "low_network_0", "10000ps"), (comp_l2cache, "high_network_0", "1000ps")) link_mem_bus_link = sst.Link("link_mem_bus_link")
sub1_0_0 = sub1_0.setSubComponent("mySubCompSlot", "coreTestElement.SubCompReceiver",0) sub1_0_0.enableAllStatistics() sub1_0_1 = sub1_0.setSubComponent("mySubCompSlot", "coreTestElement.SubCompReceiver",1) sub1_0_1.enableAllStatistics() sub1_1 = loader1.setSubComponent("mySubComp", "coreTestElement.SubCompSlot",1) sub1_1_0 = sub1_1.setSubComponent("mySubCompSlot", "coreTestElement.SubCompReceiver",0) sub1_1_0.enableAllStatistics() sub1_1_1 = sub1_1.setSubComponent("mySubCompSlot", "coreTestElement.SubCompReceiver",1) sub1_1_1.enableAllStatistics() # Set up links link0_0 = sst.Link("myLink0_0") link0_0.connect((sub0_0_0, "sendPort", "5ns"), (sub1_0_0, "recvPort", "5ns")) link0_1 = sst.Link("myLink0_1") link0_1.connect((sub0_0_1, "sendPort", "5ns"), (sub1_0_1, "recvPort", "5ns")) link1_0 = sst.Link("myLink1_0") link1_0.connect((sub0_1_0, "sendPort", "5ns"), (sub1_1_0, "recvPort", "5ns")) link1_1 = sst.Link("myLink1_1") link1_1.connect((sub0_1_1, "sendPort", "5ns"), (sub1_1_1, "recvPort", "5ns")) sst.setStatisticLoadLevel(1)
"debug": globalDebug, "debug_level": globalLevel, "L1": "1", "access_latency_cycles": "4", "associativity": "8", "cache_frequency": "1200MHz", "cache_line_size": "64", "cache_size": "32KB", }) l1_cpulink = l1_cache.setSubComponent("cpulink", "memHierarchy.MemLink") l1_memlink = l1_cache.setSubComponent("memlink", "memHierarchy.MemNIC") l1_memlink.addParams({"group": 1}) l1_linkctrl = l1_memlink.setSubComponent("linkcontrol", "shogun.ShogunNIC") corel1link = sst.Link("cpu_l1_link_" + str(cpu_id)) corel1link.connect((comp_cpu, "cache_link", "100ps"), (l1_cpulink, "port", "100ps")) corel1link.setNoCut() l1xbarlink = sst.Link("l1_xbar_link_" + str(cpu_id)) l1xbarlink.connect((l1_linkctrl, "port", "100ps"), (router, "port" + str(next_port), "100ps")) l1xbarlink.setNoCut() next_port = next_port + 1 # Connect L2 caches to the routers num_L2s_per_stack = num_l2 / hbmStacks sub_mems = memoryControllers total_mems = hbmStacks * sub_mems
def build(self, nodeID): # L2 tileL2cache = sst.Component("l2cache_" + str(self.next_tile_id), "memHierarchy.Cache") tileL2cache.addParams(l2_cache_params) # l2 prefetcher l2pre = tileL2cache.setSubComponent("prefetcher", "cassini.StridePrefetcher") l2pre.addParams(l2_prefetch_params) # l2 bus link l2tol1 = tileL2cache.setSubComponent("cpulink", "memHierarchy.MemLink") # l2 NIC l2NIC = tileL2cache.setSubComponent("memlink", "memHierarchy.MemNICFour") l2data = l2NIC.setSubComponent("data", "kingsley.linkcontrol") l2req = l2NIC.setSubComponent("req", "kingsley.linkcontrol") l2fwd = l2NIC.setSubComponent("fwd", "kingsley.linkcontrol") l2ack = l2NIC.setSubComponent("ack", "kingsley.linkcontrol") l2NIC.addParams(l2_nic_params) l2data.addParams(data_net_params) l2req.addParams(ctrl_net_params) l2fwd.addParams(ctrl_net_params) l2ack.addParams(ctrl_net_params) # Bus (from l1s to l2) l2bus = sst.Component("l2cachebus_" + str(self.next_tile_id), "memHierarchy.Bus") l2bus.addParams({ "bus_frequency" : core_clock }) l2busLink = sst.Link("l2bus_link_" + str(self.next_tile_id)) l2busLink.connect( (l2bus, "low_network_0", mesh_link_latency), (l2tol1, "port", mesh_link_latency)) l2busLink.setNoCut() self.next_tile_id = self.next_tile_id + 1 # Left Core L1 tileLeftL1 = sst.Component("l1cache_" + str(self.next_core_id), "memHierarchy.Cache") tileLeftL1.addParams(l1_cache_params) if not quiet: print("Creating core " + str(self.next_core_id) + " on tile: " + str(self.next_tile_id) + "...") # Left SMT leftSMT = sst.Component("smt_" + str(self.next_core_id), "memHierarchy.multithreadL1") leftSMT.addParams({ "clock" : core_clock, "requests_per_cycle" : 2, "responses_per_cycle" : 2, }) # Left Core mirandaL0 = sst.Component("thread_" + str(self.next_core_id), "miranda.BaseCPU") mirandaL1 = sst.Component("thread_" + str(self.next_core_id + 18), "miranda.BaseCPU") mirandaL0.addParams(cpu_params) mirandaL1.addParams(cpu_params) genL0 = mirandaL0.setSubComponent("generator", "miranda.STREAMBenchGenerator") genL1 = mirandaL1.setSubComponent("generator", "miranda.STREAMBenchGenerator") genL0.addParams(gen_params) genL1.addParams(gen_params) genL0.addParams({ "start_a" : self.base_a + self.next_core_id * thread_iters * 8, "start_b" : self.base_b + self.next_core_id * thread_iters * 8, "start_c" : self.base_c + self.next_core_id * thread_iters * 8 }) genL1.addParams({ "start_a" : self.base_a + (self.next_core_id + 18) * thread_iters * 8, "start_b" : self.base_b + (self.next_core_id + 18) * thread_iters * 8, "start_c" : self.base_c + (self.next_core_id + 18) * thread_iters * 8 }) # Thread 0 leftSMTCPUlink0 = sst.Link("smt_cpu_" + str(self.next_core_id)) leftSMTCPUlink0.connect( (mirandaL0, "cache_link", mesh_link_latency), (leftSMT, "thread0", mesh_link_latency) ) # Thread 1 leftSMTCPUlink1 = sst.Link("smt_cpu_" + str(self.next_core_id + 18)) leftSMTCPUlink1.connect( (mirandaL1, "cache_link", mesh_link_latency), (leftSMT, "thread1", mesh_link_latency) ) # SMT Shim <-> L1 leftSMTL1link = sst.Link("l1cache_smt_" + str(self.next_core_id)) leftSMTL1link.connect( (leftSMT, "cache", mesh_link_latency), (tileLeftL1, "high_network_0", mesh_link_latency) ) leftSMTCPUlink0.setNoCut() leftSMTCPUlink1.setNoCut() leftSMTL1link.setNoCut() leftL1L2link = sst.Link("l1cache_link_" + str(self.next_core_id)) leftL1L2link.connect( (l2bus, "high_network_0", mesh_link_latency), (tileLeftL1, "low_network_0", mesh_link_latency)) leftL1L2link.setNoCut() self.next_core_id = self.next_core_id + 1 tileRightL1 = sst.Component("l1cache_" + str(self.next_core_id), "memHierarchy.Cache") tileRightL1.addParams(l1_cache_params) if not quiet: print("Creating core " + str(self.next_core_id) + " on tile: " + str(self.next_tile_id) + "...") # Right SMT rightSMT = sst.Component("smt_" + str(self.next_core_id), "memHierarchy.multithreadL1") rightSMT.addParams({ "clock" : core_clock, "requests_per_cycle" : 2, "responses_per_cycle" : 2, }) # Right Core mirandaR0 = sst.Component("thread_" + str(self.next_core_id), "miranda.BaseCPU") mirandaR1 = sst.Component("thread_" + str(self.next_core_id + 18), "miranda.BaseCPU") mirandaR0.addParams(cpu_params) mirandaR1.addParams(cpu_params) genR0 = mirandaR0.setSubComponent("generator", "miranda.STREAMBenchGenerator") genR1 = mirandaR1.setSubComponent("generator", "miranda.STREAMBenchGenerator") genR0.addParams(gen_params) genR1.addParams(gen_params) genR0.addParams({ "start_a" : self.base_a + self.next_core_id * thread_iters * 8, "start_b" : self.base_b + self.next_core_id * thread_iters * 8, "start_c" : self.base_c + self.next_core_id * thread_iters * 8 }) genR1.addParams({ "start_a" : self.base_a + (self.next_core_id + 18) * thread_iters * 8, "start_b" : self.base_b + (self.next_core_id + 18) * thread_iters * 8, "start_c" : self.base_c + (self.next_core_id + 18) * thread_iters * 8 }) # Thread 0 rightSMTCPUlink0 = sst.Link("smt_cpu_" + str(self.next_core_id)) rightSMTCPUlink0.connect( (mirandaR0, "cache_link", mesh_link_latency), (rightSMT, "thread0", mesh_link_latency) ) # Thread 1 rightSMTCPUlink1 = sst.Link("smt_cpu_" + str(self.next_core_id + 18)) rightSMTCPUlink1.connect( (mirandaR1, "cache_link", mesh_link_latency), (rightSMT, "thread1", mesh_link_latency) ) # SMT Shim <-> L1 rightSMTL1link = sst.Link("l1cache_smt_" + str(self.next_core_id)) rightSMTL1link.connect( (rightSMT, "cache", mesh_link_latency), (tileRightL1, "high_network_0", mesh_link_latency) ) rightSMTCPUlink0.setNoCut() rightSMTCPUlink1.setNoCut() rightSMTL1link.setNoCut() rightL1L2link = sst.Link("l1cache_link_" + str(self.next_core_id)) rightL1L2link.connect( (l2bus, "high_network_1", mesh_link_latency), (tileRightL1, "low_network_0", mesh_link_latency)) rightL1L2link.setNoCut() self.next_core_id = self.next_core_id + 1 return (l2req, "rtr_port", mesh_link_latency), (l2ack, "rtr_port", mesh_link_latency), (l2fwd, "rtr_port", mesh_link_latency), (l2data, "rtr_port", mesh_link_latency)
l2cache = sst.Component("c" + str(i) + ".l2cache", "memHierarchy.Cache") l2cache.addParams({ "access_latency_cycles": "9", "mshr_latency_cycles": 2, "cache_frequency": "2Ghz", "replacement_policy": "lru", "coherence_protocol": "MESI", "associativity": "8", "cache_line_size": "64", "cache_size": "32 KB", "debug": "0" }) # Connect link_cpu_l1 = sst.Link("link_cpu_l1_" + str(i)) link_cpu_l1.connect((cpu, "mem_link", "500ps"), (l1cache, "high_network_0", "500ps")) link_l1_l2 = sst.Link("link_l1_l2_" + str(i)) link_l1_l2.connect((l1cache, "low_network_0", "500ps"), (l2cache, "high_network_0", "500ps")) link_l2_bus = sst.Link("link_l2_bus_" + str(i)) link_l2_bus.connect((l2cache, "low_network_0", "1000ps"), (bus, "high_network_" + str(i), "1000ps")) comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") comp_chiprtr.addParams({ "xbar_bw": "1GB/s", "link_bw": "1GB/s",
"backend.collect_stats": "0", "backend.transfer_delay": "0", "backend.threshold": 1, "backend.page_add_strategy": "RAND", "backend.page_replace_strategy": "FIFO", }) # Enable statistics sst.setStatisticLoadLevel(7) sst.setStatisticOutput("sst.statOutputConsole") sst.enableAllStatisticsForComponentType("memHierarchy.Cache") sst.enableAllStatisticsForComponentType("memHierarchy.MemController") sst.enableAllStatisticsForComponentType("memHierarchy.DirectoryController") # Define the simulation links link_c0_l1cache = sst.Link("link_c0_l1cache") link_c0_l1cache.connect((comp_cpu0, "mem_link", "500ps"), (comp_c0_l1cache, "high_network_0", "500ps")) link_c0L1cache_bus = sst.Link("link_c0L1cache_bus") link_c0L1cache_bus.connect((comp_c0_l1cache, "low_network_0", "1000ps"), (comp_n0_bus, "high_network_0", "1000ps")) link_c1_l1cache = sst.Link("link_c1_l1cache") link_c1_l1cache.connect((comp_cpu1, "mem_link", "500ps"), (comp_c1_l1cache, "high_network_0", "500ps")) link_c1L1cache_bus = sst.Link("link_c1L1cache_bus") link_c1L1cache_bus.connect((comp_c1_l1cache, "low_network_0", "1000ps"), (comp_n0_bus, "high_network_1", "1000ps")) link_bus_n0L2cache = sst.Link("link_bus_n0L2cache") link_bus_n0L2cache.connect((comp_n0_bus, "low_network_0", "1000ps"), (comp_n0_l2cache, "high_network_0", "1000ps")) link_n0L2cache_bus = sst.Link("link_n0L2cache_bus")
l1_memlink.addParams({ "node": node, }) l2 = sst.Component("node" + str(node) + "_l2cache_" + str(next_core), "memHierarchy.Cache") l2.addParams(l2_params) l2_cpulink = l2.setSubComponent("cpulink", "memHierarchy.MemLink") l2_memlink = l2.setSubComponent("memlink", "Opal.OpalMemNIC") l2_cpulink.addParams(link_params) l2_memlink.addParams(nic_params) l2_cpulink.addParams({"node": node}) l2_memlink.addParams({"node": node}) l2_memlink.addParams({"group": 1}) arielMMULink = sst.Link("node" + str(node) + "_cpu_mmu_link_" + str(next_core)) MMUCacheLink = sst.Link("node" + str(node) + "_mmu_cache_link_" + str(next_core)) PTWMemLink = sst.Link("node" + str(node) + "_ptw_mem_link_" + str(next_core)) PTWOpalLink = sst.Link("node" + str(node) + "_ptw_opal_" + str(next_core)) ArielOpalLink = sst.Link("node" + str(node) + "_ariel_opal_" + str(next_core)) if next_core < cores // 2: arielMMULink.connect((ariel, "cache_link_%d" % next_core, "300ps"), (mmu, "cpu_to_mmu%d" % next_core, "300ps")) ArielOpalLink.connect( (memmgr, "opal_link_%d" % next_core, "300ps"), (opal, "coreLink%d" % (next_core + node *
def build(self, nodeID, extraKeys): nic = sst.Component("nic" + str(nodeID), "firefly.nic") nic.addParams(self.nicParams) nic.addParams(extraKeys) nic.addParam("nid", nodeID) retval = (nic, "rtr", sst.merlin._params["link_lat"]) built = False if self.detailedModel: built = self.detailedModel.build(nodeID, self.numCores) memory = None if built: nic.addLink(self.detailedModel.getNicLink(), "detailed", "1ps") memory = sst.Component("memory" + str(nodeID), "thornhill.MemoryHeap") memory.addParam("nid", nodeID) #memory.addParam( "verboseLevel", 1 ) loopBack = sst.Component("loopBack" + str(nodeID), "firefly.loopBack") loopBack.addParam("numCores", self.numCores) # Create a motifLog only for one core of the desired node(s) logCreatedforFirstCore = False # end for x in xrange(self.numCores): ep = sst.Component( "nic" + str(nodeID) + "core" + str(x) + "_EmberEP", "ember.EmberEngine") ep.addParams(self.motifs) if built: links = self.detailedModel.getThreadLinks(x) cpuNum = 0 for link in links: ep.addLink(link, "detailed" + str(cpuNum), "1ps") cpuNum = cpuNum + 1 # Create a motif log only for the desired list of nodes (endpoints) # Delete the 'motifLog' parameter from the param list of other endpoints if 'motifLog' in self.driverParams: if self.driverParams['motifLog'] != '': if (self.motifLogNodes): for id in self.motifLogNodes: if nodeID == int( id) and logCreatedforFirstCore == False: #print str(nodeID) + " " + str(self.driverParams['jobId']) + " " + str(self.motifLogNodes) #print "Create motifLog for node {0}".format(id) logCreatedforFirstCore = True ep.addParams(self.driverParams) else: tempParams = copy.copy(self.driverParams) del tempParams['motifLog'] ep.addParams(tempParams) else: tempParams = copy.copy(self.driverParams) del tempParams['motifLog'] ep.addParams(tempParams) else: ep.addParams(self.driverParams) else: ep.addParams(self.driverParams) # end # Original version before motifLog #ep.addParams(self.driverParams) for id in self.statNodes: if nodeID == id: print "printStats for node {0}".format(id) ep.addParams({'motif1.printStats': 1}) ep.addParams({'hermesParams.netId': nodeID}) ep.addParams({'hermesParams.netMapId': self.nidMap[nodeID]}) ep.addParams({'hermesParams.netMapSize': self.numNids}) ep.addParams({'hermesParams.coreId': x}) nicLink = sst.Link("nic" + str(nodeID) + "core" + str(x) + "_Link") nicLink.setNoCut() loopLink = sst.Link("loop" + str(nodeID) + "core" + str(x) + "_Link") loopLink.setNoCut() #ep.addLink(nicLink, "nic", self.nicParams["nic2host_lat"] ) #nic.addLink(nicLink, "core" + str(x), self.nicParams["nic2host_lat"] ) ep.addLink(nicLink, "nic", "1ns") nic.addLink(nicLink, "core" + str(x), "1ns") ep.addLink(loopLink, "loop", "1ns") loopBack.addLink(loopLink, "core" + str(x), "1ns") if built: memoryLink = sst.Link("memory" + str(nodeID) + "core" + str(x) + "_Link") memoryLink.setNoCut() ep.addLink(memoryLink, "memoryHeap", "0 ps") memory.addLink(memoryLink, "detailed" + str(x), "0 ns") return retval
"debug_level": DEBUG_LEVEL, "clock": "1GHz", "addr_range_end": 512 * 1024 * 1024 - 1, }) mem_nic = memctrl.setSubComponent("cpulink", "memHierarchy.MemNIC") mem_nic.addParams({"group": 2, "network_bw": "25GB/s"}) #mem_nic.addParams(debug_params) memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") memory.addParams({"access_time": "100 ns", "mem_size": "512MiB"}) # Enable statistics sst.setStatisticLoadLevel(7) sst.setStatisticOutput("sst.statOutputConsole") for a in componentlist: sst.enableAllStatisticsForComponentType(a) # Define the simulation links # cpu/cpu_nic # | # l1/l1_nic - chiprtr - mem_nic/mem # link_cpu_rtr = sst.Link("link_cpu") link_cpu_rtr.connect((cpu_nic, "port", "1000ps"), (chiprtr, "port0", "1000ps")) link_l1_rtr = sst.Link("link_l1") link_l1_rtr.connect((l1_nic, "port", '1000ps'), (chiprtr, "port1", "1000ps")) link_mem_rtr = sst.Link("link_mem") link_mem_rtr.connect((mem_nic, "port", "1000ps"), (chiprtr, "port2", "1000ps"))
"pagesize" : pagesize, "backend" : "memHierarchy.cramsim", "backend.max_outstanding_requests" : 1024, "request_width" : cacheLineSize, "backend.mem_size" : "%dGiB"%memSize, }) memory.enableAllStatistics() membus = sst.Component("membus", "memHierarchy.Bus") membus.addParams({ "bus_frequency" : cacheFrequency, }) link_dir_cramsim_link = sst.Link("link_dir_cramsim_link") link_dir_cramsim_link.connect( (memory, "cube_link", busLat_slow), (comp_memhBridge,"cpuLink", busLat_slow) ) # memhBridge(=TxnGen) <-> Memory Controller memHLink = sst.Link("memHLink_1") memHLink.connect( (comp_memhBridge, "memLink", g_params["clockCycle"]), (comp_controller0, "txngenLink", g_params["clockCycle"]) ) # Controller <-> Dimm cmdLink = sst.Link("cmdLink_1") cmdLink.connect( (comp_controller0, "memLink", g_params["clockCycle"]), (comp_dimm0, "ctrlLink", g_params["clockCycle"]) ) # Bus to L3 and L3 <-> MM BusL3Link = sst.Link("bus_L3") BusL3Link.connect((membus, "low_network_0", busLat_slow), (l3, "high_network_0", busLat_slow))
def getLink(name1, name2): name = "link.%s:%s" % (name1, name2) if name not in links: links[name] = sst.Link(name) return links[name]
for gid in system_gids: parent = system_parents[gid] operations[gid] = {} relations[gid] = {} if system_kinds[gid] in system_operations: operations[gid] = system_operations[system_kinds[gid]] if gid in system_relations: relations[gid] = system_relations[gid] for component in system_relations[gid]: component_id = system_relations[gid][component] if component_id != gid: link = sst.Link("link"+str(gid)+" "+str(component_id)) system_comps[gid].addLink(link, "Link "+str(component_id), "1ps") system_comps[component_id].addLink(link, "Link "+str(gid), "1ps") if gid in component_links: component_links[gid].append(component_id) else: component_links.update({gid: [component_id]}) if component_id in other_links: other_links[component_id].append(gid) else: other_links.update({component_id: [gid]}) for edge in system_edges[gid]: if edge: ic_gid = edge[-1] link = sst.Link("link"+str(gid)+" "+str(ic_gid)) system_comps[gid].addLink(link, "Link "+str(ic_gid), "1ps") system_comps[ic_gid].addLink(link, "Link "+str(gid), "1ps") if gid in component_links: component_links[gid].append(ic_gid)
"network_output_buffer_size" : "2KB", }) comp_memctrl1 = sst.Component("memory1", "memHierarchy.MemController") comp_memctrl1.addParams({ "debug" : """0""", "clock" : """1.6GHz""", }) comp_memory1 = comp_memctrl1.setSubComponent("backend", "memHierarchy.simpleMem") comp_memory1.addParams({ "mem_size" : "512MiB", "access_time" : "5 ns" }) # Define the simulation links link_core0_dcache = sst.Link("link_core0_dcache") link_core0_dcache.connect( (ariel_cpus, "cache_link_0", "500ps"), (comp_c0_l1Dcache, "high_network_0", "500ps") ) link_core1_dcache = sst.Link("link_core1_dcache") link_core1_dcache.connect( (ariel_cpus, "cache_link_1", "500ps"), (comp_c1_l1Dcache, "high_network_0", "500ps") ) link_core2_dcache = sst.Link("link_core2_dcache") link_core2_dcache.connect( (ariel_cpus, "cache_link_2", "500ps"), (comp_c2_l1Dcache, "high_network_0", "500ps") ) link_core3_dcache = sst.Link("link_core3_dcache") link_core3_dcache.connect( (ariel_cpus, "cache_link_3", "500ps"), (comp_c3_l1Dcache, "high_network_0", "500ps") ) link_core4_dcache = sst.Link("link_core4_dcache") link_core4_dcache.connect( (ariel_cpus, "cache_link_4", "500ps"), (comp_c4_l1Dcache, "high_network_0", "500ps") ) link_core5_dcache = sst.Link("link_core5_dcache") link_core5_dcache.connect( (ariel_cpus, "cache_link_5", "500ps"), (comp_c5_l1Dcache, "high_network_0", "500ps") ) link_core6_dcache = sst.Link("link_core6_dcache") link_core6_dcache.connect( (ariel_cpus, "cache_link_6", "500ps"), (comp_c6_l1Dcache, "high_network_0", "500ps") ) link_core7_dcache = sst.Link("link_core7_dcache") link_core7_dcache.connect( (ariel_cpus, "cache_link_7", "500ps"), (comp_c7_l1Dcache, "high_network_0", "500ps") )
l3.addParams({ "cache_frequency": cacheFrequency, "cache_size": "%d MB" % corecount * 1, "cache_line_size": cacheLineSize, "associativity": "8", "access_latency_cycles": "20", "coherence_protocol": coherenceProtocol, "replacement_policy": rplPolicy, "L1": "0", "debug": memDebug, "debug_level": memDebugLevel, "mshr_num_entries": "16", }) # Bus to L3 and L3 <-> MM BusL3Link = sst.Link("bus_L3") BusL3Link.connect((membus, "low_network_0", busLat), (l3, "high_network_0", busLat)) L3MemCtrlLink = sst.Link("L3MemCtrl") L3MemCtrlLink.connect((l3, "low_network_0", busLat), (memory, "direct_link", busLat)) # txn gen --> memHierarchy Bridge comp_memhBridge = sst.Component("memh_bridge", "CramSim.c_MemhBridge") comp_memhBridge.addParams(g_params) comp_memhBridge.addParams({ "verbose": verbose, "numTxnPerCycle": g_params["numChannels"], }) # controller
# link = sst.Link("s%d_dss%d_core%d:l1" % (next_slice, next_dss, next_core)) # link.connect((cpu[core_id], "mem_link", params.link_latency), (l1, "high_network_0", params.link_latency)) # link = sst.Link("s%d_dss%d_core%d_l1:bus" % (next_slice, next_dss, next_core)) # link.connect((l1, "low_network_0", params.link_latency), (bus, "high_network_%d" % next_core, params.link_latency)) ################################################# l1i = sst.Component("s%d_dss%d_core%d_l1icache" % (next_slice, next_dss, next_core), "memHierarchy.Cache") l1i.addParams(params.l1_params) l1d = sst.Component("s%d_dss%d_core%d_l1dcache" % (next_slice, next_dss, next_core), "memHierarchy.Cache") l1d.addParams(params.l1_params) link = sst.Link("s%d_dss%d_core%d:l1i" % (next_slice, next_dss, next_core)) link.connect((gpu, "core%d-icache" % core_id, params.link_latency), (l1i, "high_network_0", params.link_latency)) link = sst.Link("s%d_dss%d_core%d:l1d" % (next_slice, next_dss, next_core)) link.connect((gpu, "core%d-dcache" % core_id, params.link_latency), (l1d, "high_network_0", params.link_latency)) link = sst.Link("s%d_dss%d_core%d_l1i:bus" % (next_slice, next_dss, next_core)) link.connect((l1i, "low_network_0", params.link_latency), (bus, "high_network_%d" % (2*next_core+0), params.link_latency)) link = sst.Link("s%d_dss%d_core%d_l1d:bus" % (next_slice, next_dss, next_core)) link.connect((l1d, "low_network_0", params.link_latency), (bus, "high_network_%d" % (2*next_core+1), params.link_latency)) ################################################# link = sst.Link("s%d_dss%d_bus:l2" % (next_slice, next_dss)) link.connect((bus, "low_network_0", params.link_latency), (l2, "high_network_0", params.link_latency))
sub0_1_0 = sub0_1.setSubComponent("mySubCompSlot","simpleElementExample.SubCompSenderLegacy",0); sub0_1_0.addParam("sendCount", 15) sub0_1_0.enableAllStatistics() # Set up receivers using slots and user subcomponent loader1 = sst.Component("Loader1", "simpleElementExample.SubComponentLoaderLegacy") loader1.addParam("clock", "1.0GHz") sub1_0 = loader1.setSubComponent("mySubComp", "simpleElementExample.SubCompSlotLegacy",0) sub1_0_0 = sub1_0.setSubComponent("mySubCompSlot", "simpleElementExample.SubCompReceiverLegacy",0) sub1_0_0.enableAllStatistics() sub1_1 = loader1.setSubComponent("mySubComp", "simpleElementExample.SubCompSlotLegacy",1) sub1_1_0 = sub1_1.setSubComponent("mySubCompSlot", "simpleElementExample.SubCompReceiverLegacy",0) sub1_1_0.enableAllStatistics() # Set up links link0 = sst.Link("myLink0") link0.connect((sub0_0_0, "sendPort", "5ns"), (sub1_0_0, "recvPort", "5ns")) link1 = sst.Link("myLink1") link1.connect((sub0_1_0, "sendPort", "5ns"), (sub1_1_0, "recvPort", "5ns")) sst.setStatisticLoadLevel(1)
"clock": "1GHz", "backend.mem_size": "512MiB", "memNIC.network_bw": "50GB/s", "memNIC.addr_range_start": 128, "memNIC.interleave_size": "128B", "memNIC.interleave_step": "256B" }) # Enable statistics sst.setStatisticLoadLevel(7) sst.setStatisticOutput("sst.statOutputConsole") sst.enableAllStatisticsForComponentType("memHierarchy.Scratchpad") sst.enableAllStatisticsForComponentType("memHierarchy.MemController") # Define the simulation links link_cpu0_scratch0 = sst.Link("link_cpu0_scratch0") link_cpu0_scratch0.connect((comp_cpu0, "mem_link", "1000ps"), (comp_scratch0, "cpu", "1000ps")) link_cpu0_scratch1 = sst.Link("link_cpu1_scratch1") link_cpu0_scratch1.connect((comp_cpu1, "mem_link", "1000ps"), (comp_scratch1, "cpu", "1000ps")) link_scratch0_net = sst.Link("link_scratch0_net") link_scratch0_net.connect((comp_scratch0, "network", "100ps"), (comp_net, "port0", "100ps")) link_scratch1_net = sst.Link("link_scratch1_net") link_scratch1_net.connect((comp_scratch1, "network", "100ps"), (comp_net, "port1", "100ps")) link_mem0_net = sst.Link("link_mem0_net") link_mem0_net.connect((comp_memory0, "network", "100ps"), (comp_net, "port2", "100ps")) link_mem1_net = sst.Link("link_mem1_net")
for next_ring_stop in range( (cores_per_group + memory_controllers_per_group + l3cache_blocks_per_group) * groups): ring_rtr = sst.Component("rtr." + str(next_ring_stop), "merlin.hr_router") ring_rtr.addParams(ringstop_params) ring_rtr.addParams({"id": next_ring_stop}) topo = ring_rtr.setSubComponent("topology", "merlin.torus") topo.addParams(topology_params) router_map["rtr." + str(next_ring_stop)] = ring_rtr for next_ring_stop in range( (cores_per_group + memory_controllers_per_group + l3cache_blocks_per_group) * groups): if next_ring_stop == 0: rtr_link_positive = sst.Link("rtr_pos_" + str(next_ring_stop)) rtr_link_positive.connect((router_map["rtr.0"], "port0", ring_latency), (router_map["rtr.1"], "port1", ring_latency)) rtr_link_negative = sst.Link("rtr_neg_" + str(next_ring_stop)) rtr_link_negative.connect( (router_map["rtr.0"], "port1", ring_latency), (router_map["rtr." + str(((cores_per_group + memory_controllers_per_group + l3cache_blocks_per_group) * groups) - 1)], "port0", ring_latency)) elif next_ring_stop == ((cores_per_group + memory_controllers_per_group + l3cache_blocks_per_group) * groups) - 1: rtr_link_positive = sst.Link("rtr_pos_" + str(next_ring_stop)) rtr_link_positive.connect( (router_map["rtr." + str(next_ring_stop)], "port0", ring_latency), (router_map["rtr.0"], "port1", ring_latency))
# link = sst.Link("s%d_dss%d_core%d_l1:bus" % (next_slice, next_dss, next_core)) # link.connect((l1, "low_network_0", params.link_latency), (bus, "high_network_%d" % next_core, params.link_latency)) ################################################# l1i = sst.Component( "s%d_dss%d_core%d_l1icache" % (next_slice, next_dss, next_core), "memHierarchy.Cache") l1i.addParams(params.l1_params) l1d = sst.Component( "s%d_dss%d_core%d_l1dcache" % (next_slice, next_dss, next_core), "memHierarchy.Cache") l1d.addParams(params.l1_params) link = sst.Link("s%d_dss%d_core%d:l1i" % (next_slice, next_dss, next_core)) link.connect((gpu, "core%d-icache" % core_id, params.link_latency), (l1i, "high_network_0", params.link_latency)) link = sst.Link("s%d_dss%d_core%d:l1d" % (next_slice, next_dss, next_core)) link.connect((gpu, "core%d-dcache" % core_id, params.link_latency), (l1d, "high_network_0", params.link_latency)) link = sst.Link("s%d_dss%d_core%d_l1i:bus" % (next_slice, next_dss, next_core)) link.connect((l1i, "low_network_0", params.link_latency), (bus, "high_network_%d" % (2 * next_core + 0), params.link_latency)) link = sst.Link("s%d_dss%d_core%d_l1d:bus" %
"group": 1, "network_input_buffer_size": "2KB", "network_output_buffer_size": "2KB", "network_bw": "25GB/s", }) comp_memctrl1 = sst.Component("memory1", "memHierarchy.MemController") comp_memctrl1.addParams({ "debug": """0""", "clock": """1.6GHz""", }) comp_memory1 = comp_memctrl1.setSubComponent("backend", "memHierarchy.simpleMem") comp_memory1.addParams({"access_time": "25ns", "mem_size": "512MiB"}) # Define the simulation links link_core0_dcache = sst.Link("link_core0_dcache") link_core0_dcache.connect((ariel_cpus, "cache_link_0", "500ps"), (comp_c0_l1Dcache, "high_network_0", "500ps")) link_core1_dcache = sst.Link("link_core1_dcache") link_core1_dcache.connect((ariel_cpus, "cache_link_1", "500ps"), (comp_c1_l1Dcache, "high_network_0", "500ps")) link_core2_dcache = sst.Link("link_core2_dcache") link_core2_dcache.connect((ariel_cpus, "cache_link_2", "500ps"), (comp_c2_l1Dcache, "high_network_0", "500ps")) link_core3_dcache = sst.Link("link_core3_dcache") link_core3_dcache.connect((ariel_cpus, "cache_link_3", "500ps"), (comp_c3_l1Dcache, "high_network_0", "500ps")) link_core4_dcache = sst.Link("link_core4_dcache") link_core4_dcache.connect((ariel_cpus, "cache_link_4", "500ps"), (comp_c4_l1Dcache, "high_network_0", "500ps")) link_core5_dcache = sst.Link("link_core5_dcache")
def getLink(leftName, rightName, num): name = "link.%s:%s:%d" % (leftName, rightName, num) if name not in links: links[name] = sst.Link(name) return links[name]
kRtrFwd[-1].addParams(ctrl_network_params) kRtrData.append(sst.Component("krtr_data_" + str(nodeNum), "kingsley.noc_mesh")) kRtrData[-1].addParams(data_network_params) kRtrReq[-1].addParams({"local_ports" : 2}) kRtrAck[-1].addParams({"local_ports" : 2}) kRtrFwd[-1].addParams({"local_ports" : 2}) kRtrData[-1].addParams({"local_ports" : 2}) i = 0 for y in range(0, mesh_stops_y): for x in range (0, mesh_stops_x): # North-south connections if y != (mesh_stops_y -1): kRtrReqNS = sst.Link("krtr_req_ns_" + str(i)) kRtrReqNS.connect( (kRtrReq[i], "south", mesh_link_latency), (kRtrReq[i + mesh_stops_x], "north", mesh_link_latency) ) kRtrAckNS = sst.Link("krtr_ack_ns_" + str(i)) kRtrAckNS.connect( (kRtrAck[i], "south", mesh_link_latency), (kRtrAck[i + mesh_stops_x], "north", mesh_link_latency) ) kRtrFwdNS = sst.Link("krtr_fwd_ns_" + str(i)) kRtrFwdNS.connect( (kRtrFwd[i], "south", mesh_link_latency), (kRtrFwd[i + mesh_stops_x], "north", mesh_link_latency) ) kRtrDataNS = sst.Link("krtr_data_ns_" + str(i)) kRtrDataNS.connect( (kRtrData[i], "south", mesh_link_latency), (kRtrData[i + mesh_stops_x], "north", mesh_link_latency) ) if x != (mesh_stops_x - 1): kRtrReqEW = sst.Link("krtr_req_ew_" + str(i)) kRtrReqEW.connect( (kRtrReq[i], "east", mesh_link_latency), (kRtrReq[i+1], "west", mesh_link_latency) ) kRtrAckEW = sst.Link("krtr_ack_ew_" + str(i)) kRtrAckEW.connect( (kRtrAck[i], "east", mesh_link_latency), (kRtrAck[i+1], "west", mesh_link_latency) ) kRtrFwdEW = sst.Link("krtr_fwd_ew_" + str(i)) kRtrFwdEW.connect( (kRtrFwd[i], "east", mesh_link_latency), (kRtrFwd[i+1], "west", mesh_link_latency) )
l1cache.addParams({ "cache_frequency": "2 Ghz", "cache_size": "64 KB", "coherence_protocol": "MSI", "replacement_policy": "lru", "associativity": "8", "access_latency_cycles": "1", "cache_line_size": "64", "L1": "1", "debug": "0", }) memory = sst.Component("memory", "memHierarchy.MemController") memory.addParams({ "coherence_protocol": "MSI", "access_time": "10ns", "backend.mem_size": "2048MiB", "clock": "1GHz", "use_dramsim": "0", "device_ini": "DDR3_micron_32M_8B_x4_sg125.ini", "system_ini": "system.ini" }) cpu_cache_link = sst.Link("cpu_cache_link") cpu_cache_link.connect((ariel, "cache_link_0", "50ps"), (l1cache, "high_network_0", "50ps")) memory_link = sst.Link("mem_bus_link") memory_link.connect((l1cache, "low_network_0", "50ps"), (memory, "direct_link", "50ps"))
def setNodeDist(nodeId, rtrreq, rtrack, rtrfwd, rtrdata): port = nodeId % 2 # Even port = tile, odd = DC actNode = nodeId // 2 if nodeId == 1 or nodeId == 3 or nodeId == 5 or nodeId == 7: req, ack, fwd, data = memBuilder.build(nodeId) if nodeId == 1: port = "north" elif nodeId == 3: port = "west" elif nodeId == 5: port = "east" elif nodeId == 7: port = "south" rtrreqport = sst.Link("krtr_req_" + port + "_" +str(nodeId)) rtrreqport.connect( (rtrreq, port, mesh_link_latency), req ) rtrackport = sst.Link("krtr_ack_" + port + "_" + str(nodeId)) rtrackport.connect( (rtrack, port, mesh_link_latency), ack ) rtrfwdport = sst.Link("krtr_fwd_" + port + "_" + str(nodeId)) rtrfwdport.connect( (rtrfwd, port, mesh_link_latency), fwd ) rtrdataport = sst.Link("kRtr_data_" + port + "_" + str(nodeId)) rtrdataport.connect( (rtrdata, port, mesh_link_latency), data ) # Place tiles on all routers tilereq, tileack, tilefwd, tiledata = tileBuilder.build(nodeId) reqport0 = sst.Link("krtr_req_port0_" + str(nodeId)) reqport0.connect( (rtrreq, "local0", mesh_link_latency), tilereq ) ackport0 = sst.Link("krtr_ack_port0_" + str(nodeId)) ackport0.connect( (rtrack, "local0", mesh_link_latency), tileack ) fwdport0 = sst.Link("krtr_fwd_port0_" + str(nodeId)) fwdport0.connect( (rtrfwd, "local0", mesh_link_latency), tilefwd ) dataport0 = sst.Link("kRtr_data_port0_" + str(nodeId)) dataport0.connect( (rtrdata, "local0", mesh_link_latency), tiledata ) # Place DC at every tile except 0 if nodeId != 0: req, ack, fwd, data = DCBuilder.build(nodeId) reqport1 = sst.Link("krtr_req_port1_" + str(nodeId)) reqport1.connect( (rtrreq, "local1", mesh_link_latency), req ) ackport1 = sst.Link("krtr_ack_port1_" + str(nodeId)) ackport1.connect( (rtrack, "local1", mesh_link_latency), ack ) fwdport1 = sst.Link("krtr_fwd_port1_" + str(nodeId)) fwdport1.connect( (rtrfwd, "local1", mesh_link_latency), fwd ) dataport1 = sst.Link("kRtr_data_port1_" + str(nodeId)) dataport1.connect( (rtrdata, "local1", mesh_link_latency), data )
pagemgr.addParams({ "verbose": 1, "pagecount0": num_pages, "pagesize0": pageSize * 1024, }) sieveId = sst.Component("sieve", "memHierarchy.Sieve") sieveId.addParams({ "cache_size": "8MB", "associativity": 16, "cache_line_size": 64, "output_file": "mallocRank.txt" }) for x in range(corecount): arielL1Link = sst.Link("cpu_cache_link_%d" % x) arielL1Link.connect((ariel, "cache_link_%d" % x, busLat), (sieveId, "cpu_link_%d" % x, busLat)) arielALink = sst.Link("cpu_alloc_link_%d" % x) arielALink.connect((memmgr, "alloc_link_%d" % x, busLat), (sieveId, "alloc_link_%d" % x, busLat)) statoutputs = dict([(1, "sst.statOutputConsole"), (2, "sst.statOutputCSV"), (3, "sst.statOutputTXT")]) sst.setStatisticLoadLevel(7) sst.setStatisticOutput(statoutputs[2]) sst.enableAllStatisticsForAllComponents() print("done configuring SST")
"coherence_protocol": "MSI", "debug": "0", "backend.access_time": "100 ns", "clock": "1GHz", "backend.mem_size": "512MiB" }) # Enable statistics sst.setStatisticLoadLevel(7) sst.setStatisticOutput("sst.statOutputConsole") sst.enableAllStatisticsForComponentType("memHierarchy.Cache") sst.enableAllStatisticsForComponentType("memHierarchy.DirectoryController") sst.enableAllStatisticsForComponentType("memHierarchy.MemController") # Define the simulation links link_c0l1cache_link = sst.Link("link_c0l1cache_link") link_c0l1cache_link.connect((comp_cpu0, "mem_link", "1000ps"), (comp_c0_l1cache, "high_network_0", "1000ps")) link_c0l1cache_bus = sst.Link("link_c0l1cache_bus") link_c0l1cache_bus.connect((comp_c0_l1cache, "low_network_0", "10000ps"), (comp_n0_bus, "high_network_0", "10000ps")) link_c1l1cache_link = sst.Link("link_c1l1cache_link") link_c1l1cache_link.connect((comp_cpu1, "mem_link", "1000ps"), (comp_c1_l1cache, "high_network_0", "1000ps")) link_c1l1cache_bus = sst.Link("link_c1l1cache_bus") link_c1l1cache_bus.connect((comp_c1_l1cache, "low_network_0", "10000ps"), (comp_n0_bus, "high_network_1", "10000ps")) link_c2l1cache_link = sst.Link("link_c2l1cache_link") link_c2l1cache_link.connect((comp_cpu2, "mem_link", "1000ps"), (comp_c2_l1cache, "high_network_0", "1000ps")) link_c2l1cache_bus = sst.Link("link_c2l1cache_bus")
"max_requests_per_cycle" : 1, "mshr_num_entries" : 8, "request_link_width" : "128B", "response_link_width" : "128B", "min_packet_size" : "10B", # control message size # Prefetch parameters "prefetcher" : "cassini.NextBlockPrefetcher", "drop_prefetch_mshr_level" : 5, # Drop prefetch when total misses > 5 # MemNIC parameters "memNIC.network_bw" : network_bw, "memNIC.network_address" : x, "memNIC.network_input_buffer_size" : "2KiB", "memNIC.network_output_buffer_size" : "2KiB", }) cpu_l1_link = sst.Link("link_cpu_cache_" + str(x)) cpu_l1_link.connect ( (comp_cpu, "mem_link", "500ps"), (comp_l1cache, "high_network_0", "500ps") ) l1_l2_link = sst.Link("link_l1_l2_" + str(x)) l1_l2_link.connect( (comp_l1cache, "low_network_0", "100ps"), (comp_l2cache, "high_network_0", "100ps") ) l2_network_link = sst.Link("link_l2_network_" + str(x)) l2_network_link.connect( (comp_l2cache, "cache", "100ps"), (comp_network, "port" + str(x), "100ps") ) for x in range(caches): comp_l3cache = sst.Component("l3cache" + str(x), "memHierarchy.Cache") comp_l3cache.addParams({ "cache_frequency" : uncoreclock, "access_latency_cycles" : 14, "tag_access_latency_cycles" : 6, "mshr_latency_cycles" : 12,
comp_l1cache = sst.Component("l1cache", "memHierarchy.Cache") comp_l1cache.addParams({ "access_latency_cycles": "1", "cache_frequency": "2 Ghz", "replacement_policy": "lru", "coherence_protocol": "MESI", "associativity": "8", "cache_line_size": "64", "L1": "1", "cache_size": "64 KB" }) comp_memory = sst.Component("memory", "memHierarchy.MemController") comp_memory.addParams({ "coherence_protocol": "MESI", "clock": "1GHz", "backend.access_time": "1000 ns", "backend.device_ini": "DDR3_micron_32M_8B_x4_sg125.ini", "backend.system_ini": "system.ini", "backend.mem_size": "512", "backend": "memHierarchy.dramsim" }) # Define the simulation links link_cpu_cache_link = sst.Link("link_cpu_cache_link") link_cpu_cache_link.connect((comp_cpu, "cache_link", "1000ps"), (comp_l1cache, "high_network_0", "1000ps")) link_mem_bus_link = sst.Link("link_mem_bus_link") link_mem_bus_link.connect((comp_l1cache, "low_network_0", "50ps"), (comp_memory, "direct_link", "50ps")) # End of generated output.
}) chiprtr.setSubComponent("topology", "merlin.singlerouter") # Enable statistics sst.setStatisticLoadLevel(7) sst.setStatisticOutput("sst.statOutputConsole") for a in componentlist: sst.enableAllStatisticsForComponentType(a) # Define the simulation links # cpu/l1/cpu_l1_nic # | # mmio/l1/mmio_l1_nic - chiprtr - dir_nic/dir/mem # # Connect CPU to CPU L1 via the CPU's interface and the L1's cpulink handler link_cpu_l1 = sst.Link("link_cpu") link_cpu_l1.connect((cpu_link, "port", "1000ps"), (cpu_l1_link, "port", "1000ps")) # Connect the CPU L1 to the network via the L1's memlink NIC handler link_core_rtr = sst.Link("link_core") link_core_rtr.connect((cpu_l1_nic, "port", '1000ps'), (chiprtr, "port0", "1000ps")) # Connect MMIO to MMIO L1 via the MMIO's interface and the L1's cpulink handler link_mmio_l1 = sst.Link("link_mmio") link_mmio_l1.connect((mmio_link, "port", "500ps"), (mmio_l1_link, "port", "500ps")) # Connect the MMIO L1 to the network via the L1's memlink NIC handler link_device_rtr = sst.Link("link_device")