def genMemHierarchy(cores): membus = sst.Component("membus", "memHierarchy.Bus") membus.addParams({ "bus_frequency": cacheFrequency, }) memory = sst.Component("memory", "memHierarchy.MemController") memory.addParams({ "range_start": "0", "coherence_protocol": coherenceProtocol, "debug": memDebug, "clock": "1Ghz", "backend": "memHierarchy.cramsim", "backend.access_time": "2 ns", # Phy latency "backend.mem_size": "512MiB", "backend.max_outstanding_requests": 256, "backend.verbose": 1, "request_width": cacheLineSize }) for core in range(cores): l1 = sst.Component("l1cache_%d" % core, "memHierarchy.Cache") l1.addParams({ "cache_frequency": cacheFrequency, "cache_size": "32KB", "cache_line_size": cacheLineSize, "associativity": "8", "access_latency_cycles": "4", "coherence_protocol": coherenceProtocol, "replacement_policy": rplPolicy, "L1": "1", "debug": memDebug, "debug_level": memDebugLevel, }) l2 = sst.Component("l2cache_%d" % core, "memHierarchy.Cache") l2.addParams({ "cache_frequency": cacheFrequency, "cache_size": "256 KB", "cache_line_size": cacheLineSize, "associativity": "8", "access_latency_cycles": "10", "coherence_protocol": coherenceProtocol, "replacement_policy": rplPolicy, "L1": "0", "debug": memDebug, "debug_level": memDebugLevel, "mshr_num_entries": "16", }) ## SST Links # Ariel -> L1(PRIVATE) -> L2(PRIVATE) -> L3 (SHARED) -> DRAM ArielL1Link = sst.Link("cpu_cache_%d" % core) ArielL1Link.connect((ariel, "cache_link_%d" % core, busLat), (l1, "high_network_0", busLat)) L1L2Link = sst.Link("l1_l2_%d" % core) L1L2Link.connect((l1, "low_network_0", busLat), (l2, "high_network_0", busLat)) L2MembusLink = sst.Link("l2_membus_%d" % core) L2MembusLink.connect((l2, "low_network_0", busLat), (membus, "high_network_%d" % core, busLat)) l3 = sst.Component("L3cache", "memHierarchy.Cache") l3.addParams({ "cache_frequency": cacheFrequency, "cache_size": "8 MB", "cache_line_size": cacheLineSize, "associativity": "8", "access_latency_cycles": "20", "coherence_protocol": coherenceProtocol, "replacement_policy": rplPolicy, "L1": "0", "debug": memDebug, "debug_level": memDebugLevel, "mshr_num_entries": "16", }) # Bus to L3 and L3 <-> MM BusL3Link = sst.Link("bus_L3") BusL3Link.connect((membus, "low_network_0", busLat), (l3, "high_network_0", busLat)) L3MemCtrlLink = sst.Link("L3MemCtrl") L3MemCtrlLink.connect((l3, "low_network_0", busLat), (memory, "direct_link", busLat)) # txn gen --> memHierarchy Bridge comp_memhBridge = sst.Component("memh_bridge", "CramSim.c_MemhBridge") comp_memhBridge.addParams(g_params) comp_memhBridge.addParams({ "verbose": "0", "numTxnPerCycle": g_params["numChannels"], "strTxnTraceFile": "arielTrace", "boolPrintTxnTrace": "1" }) # controller comp_controller0 = sst.Component("MemController0", "CramSim.c_Controller") comp_controller0.addParams(g_params) comp_controller0.addParams({ "verbose": "0", "TxnConverter": "CramSim.c_TxnConverter", "AddrMapper": "CramSim.c_AddressHasher", "CmdScheduler": "CramSim.c_CmdScheduler", "DeviceController": "CramSim.c_DeviceController" }) # bank receiver comp_dimm0 = sst.Component("Dimm0", "CramSim.c_Dimm") comp_dimm0.addParams(g_params) link_dir_cramsim_link = sst.Link("link_dir_cramsim_link") link_dir_cramsim_link.connect((memory, "cube_link", "2ns"), (comp_memhBridge, "cpuLink", "2ns")) # memhBridge(=TxnGen) <-> Memory Controller memHLink = sst.Link("memHLink_1") memHLink.connect((comp_memhBridge, "memLink", g_params["clockCycle"]), (comp_controller0, "txngenLink", g_params["clockCycle"])) # Controller <-> Dimm cmdLink = sst.Link("cmdLink_1") cmdLink.connect((comp_controller0, "memLink", g_params["clockCycle"]), (comp_dimm0, "ctrlLink", g_params["clockCycle"])) comp_controller0.enableAllStatistics() comp_memhBridge.enableAllStatistics() comp_dimm0.enableAllStatistics()
import sst # Define SST core options sst.setProgramOption("timebase", "1ps") sst.setProgramOption("stopAtCycle", "0 ns") # Tell SST what statistics handling we want sst.setStatisticLoadLevel(4) memory_mb = 128 # Define the simulation components comp_cpu = sst.Component("cpu", "miranda.BaseCPU") comp_cpu.addParams({ "verbose": 0, "generator": "miranda.GUPSGenerator", "generatorParams.verbose": 0, "generatorParams.count": 10000, "generatorParams.max_address": ((memory_mb) / 2) * 1024 * 1024, }) # Enable statistics outputs comp_cpu.enableAllStatistics({"type": "sst.AccumulatorStatistic"}) comp_l1cache = sst.Component("l1cache", "memHierarchy.Cache") comp_l1cache.addParams({ "access_latency_cycles": "2",
def build(self, nodeID): tileL2cache = sst.Component("l2cache_" + str(self.next_tile_id), "memHierarchy.Cache") tileL2cache.addParams(l2_cache_params) tileL2cache.addParams(l2_prefetch_params) # Define L2 NIC l2clink = tileL2cache.setSubComponent("cpulink", "memHierarchy.MemLink") l2mlink = tileL2cache.setSubComponent("memlink", "memHierarchy.MemNICFour") l2data = l2mlink.setSubComponent("data", "kingsley.linkcontrol") l2req = l2mlink.setSubComponent("req", "kingsley.linkcontrol") l2fwd = l2mlink.setSubComponent("fwd", "kingsley.linkcontrol") l2ack = l2mlink.setSubComponent("ack", "kingsley.linkcontrol") l2mlink.addParams(nic_params) l2mlink.addParams({"group": 1}) l2data.addParams(data_nic_params) l2req.addParams(ctrl_nic_params) l2fwd.addParams(ctrl_nic_params) l2ack.addParams(ctrl_nic_params) # Bus between L2 and L1s on the tile l2bus = sst.Component("l2cachebus_" + str(self.next_tile_id), "memHierarchy.Bus") l2bus.addParams({ "bus_frequency": core_clock, }) l2busLink = sst.Link("l2bus_link_" + str(self.next_tile_id)) l2busLink.connect((l2bus, "low_network_0", mesh_link_latency), (l2clink, "port", mesh_link_latency)) l2busLink.setNoCut() # L1s self.next_tile_id = self.next_tile_id + 1 tileLeftL1 = sst.Component("l1cache_" + str(self.next_core_id), "memHierarchy.Cache") tileLeftL1.addParams(l1_cache_params) if not quiet: print("Creating core " + str(self.next_core_id) + " on tile: " + str(self.next_tile_id) + "...") leftL1L2link = sst.Link("l1cache_link_" + str(self.next_core_id)) leftL1L2link.connect((l2bus, "high_network_0", mesh_link_latency), (tileLeftL1, "low_network_0", mesh_link_latency)) leftL1L2link.setNoCut() leftCore = sst.Component("core_" + str(self.next_core_id), "miranda.BaseCPU") leftCore.addParams(core_params) leftGen = leftCore.setSubComponent("generator", "miranda.STREAMBenchGenerator") leftGen.addParams(gen_params) leftGen.addParams({ "start_a": self.base_a + self.next_core_id * thread_iters * 8, "start_b": self.base_b + self.next_core_id * thread_iters * 8, "start_c": self.base_c + self.next_core_id * thread_iters * 8, }) leftCoreL1link = sst.Link("core_link_" + str(self.next_core_id)) leftCoreL1link.connect( (leftCore, "cache_link", mesh_link_latency), (tileLeftL1, "high_network_0", mesh_link_latency)) leftCoreL1link.setNoCut() self.next_core_id = self.next_core_id + 1 tileRightL1 = sst.Component("l1cache_" + str(self.next_core_id), "memHierarchy.Cache") tileRightL1.addParams(l1_cache_params) rightCore = sst.Component("core_" + str(self.next_core_id), "miranda.BaseCPU") rightCore.addParams(core_params) rightGen = rightCore.setSubComponent("generator", "miranda.STREAMBenchGenerator") rightGen.addParams(gen_params) rightGen.addParams({ "start_a": self.base_a + self.next_core_id * thread_iters * 8, "start_b": self.base_b + self.next_core_id * thread_iters * 8, "start_c": self.base_c + self.next_core_id * thread_iters * 8, }) rightCoreL1link = sst.Link("core_link_" + str(self.next_core_id)) rightCoreL1link.connect( (rightCore, "cache_link", mesh_link_latency), (tileRightL1, "high_network_0", mesh_link_latency)) rightCoreL1link.setNoCut() if not quiet: print("Creating core " + str(self.next_core_id) + " on tile: " + str(self.next_tile_id) + "...") rightL1L2link = sst.Link("l1cache_link_" + str(self.next_core_id)) rightL1L2link.connect( (l2bus, "high_network_1", mesh_link_latency), (tileRightL1, "low_network_0", mesh_link_latency)) rightL1L2link.setNoCut() self.next_core_id = self.next_core_id + 1 return (l2req, "rtr_port", mesh_link_latency), (l2ack, "rtr_port", mesh_link_latency), ( l2fwd, "rtr_port", mesh_link_latency), (l2data, "rtr_port", mesh_link_latency)
# Automatically generated SST Python input import sst # Define SST core options sst.setProgramOption("timebase", "1 ps") sst.setProgramOption("stopAtCycle", "10000s") # Define the simulation components comp_clocker0 = sst.Component("clocker0", "simpleElementExample.simpleClockerComponent") comp_clocker0.addParams({ "clockcount" : """100000000""", "clock" : """1MHz""" }) # Define the simulation links # End of generated output.
import sst # Define the simulation components # 4 cores with non-inclusive L1/L2 hierarchies # 2 inclusive L3s cores = 8 caches = 4 # Number of LLCs on the network memories = 2 coreclock = "2.4GHz" uncoreclock = "1.4GHz" coherence = "MESI" network_bw = "60GB/s" # Create merlin network - this is just simple single router comp_network = sst.Component("network", "merlin.hr_router") comp_network.addParams({ "xbar_bw" : network_bw, "link_bw" : network_bw, "input_buf_size" : "2KiB", "num_ports" : cores + caches + memories, "flit_size" : "36B", "output_buf_size" : "2KiB", "id" : "0", "topology" : "merlin.singlerouter" }) for x in range(cores): comp_cpu = sst.Component("cpu" + str(x), "memHierarchy.trivialCPU") comp_cpu.addParams({ "clock" : coreclock,
# Automatically generated SST Python input import sst # Define SST core options sst.setProgramOption("timebase", "1 ps") sst.setProgramOption("stopAtCycle", "100000s") # Define the simulation components comp_system = sst.Component("system", "m5C.M5") comp_system.addParams({ "debug": """0""", "info": """yes""", "configFile": """argvM5.xml""", "M5debug": """none""", "registerExit": """yes""" }) # Define the simulation links # End of generated output.
import sst import os sst.setProgramOption("timebase", "1ps") sst_root = os.getenv( "SST_ROOT" ) l2PrefetchParams = { "prefetcher": "cassini.NextBlockPrefetcher" } ariel = sst.Component("a0", "ariel.ariel") ariel.addParams({ "verbose" : "0", "maxcorequeue" : "256", "maxissuepercycle" : "2", "pipetimeout" : "0", "executable" : sst_root + "/sst-elements/src/sst/elements/ariel/frontend/simple/examples/stream/stream", "arielmode" : "1", "memmgr.memorylevels" : "1", "memmgr.defaultlevel" : "0" }) corecount = 1; l1cache = sst.Component("l1cache", "memHierarchy.Cache") l1cache.addParams({ "cache_frequency" : "2 Ghz", "cache_size" : "64 KB", "coherence_protocol" : "MSI", "replacement_policy" : "lru",
# Automatically generated SST Python input import sst # Define SST core options sst.setProgramOption("timebase", "1ps") sst.setProgramOption("stopAtCycle", "300000ns") # Define the simulation components comp_cpu0 = sst.Component("cpu0", "memHierarchy.trivialCPU") comp_cpu0.addParams({ "memSize": "0x1000", "num_loadstore": "1000", "commFreq": "100", "do_write": "1" }) comp_c0_l1cache = sst.Component("c0.l1cache", "memHierarchy.Cache") comp_c0_l1cache.addParams({ "access_latency_cycles": "5", "cache_frequency": "2 Ghz", "replacement_policy": "lru", "coherence_protocol": "MSI", "associativity": "4", "cache_line_size": "64", "debug_level": "8", "L1": "1", "debug": "0", "cache_size": "4 KB" }) comp_cpu1 = sst.Component("cpu1", "memHierarchy.trivialCPU") comp_cpu1.addParams({ "memSize": "0x1000",
class RtrPorts: def __init__(self): self._next_addr = 0 def nextPort(self): res = self._next_addr self._next_addr = self._next_addr + 1 return res def numPorts(self): return self._next_addr rtrInfo = RtrPorts() memController = sst.Component("memController", "merlin.hr_router") memController.addParams({ "id": 0, "topology": "merlin.singlerouter", "link_bw": "320GB/s", "xbar_bw": "512GB/s", "input_latency": "4ns", "output_latency": "4ns", "input_buf_size": "4KiB", "output_buf_size": "4KiB", "flit_size": "72B", }) pagesize = 4096 memoryperlevel = 4096
else: print o assert False, "Unknown Options" print L1cachesz, L2cachesz, L1assoc, L2assoc, Replacp, L2MSHR, MSIMESI, Pref1, Pref2, Executable main() # Define needed environment params os.environ['OMP_NUM_THREADS'] = "8" # Define SST core options sst.setProgramOption("timebase", "1 ps") sst.setProgramOption("stopAtCycle", "100ms") # Define the simulation components ariel_cpus = sst.Component("cpus", "ariel.ariel") ariel_cpus.addParams({ "verbose": 0, "clock": "2 Ghz", "maxcorequeue": 256, "maxissuepercycle": 4, "pipetimeout": 0, "corecount": 8, "arielmode": 1, "memmgr.memorylevels": 1, "memmgr.pagecount0": 262144, "memmgr.defaultlevel": 0, "executable": Executable }) comp_c0_l1Dcache = sst.Component("c0.l1Dcache", "memHierarchy.Cache") comp_c0_l1Dcache.addParams({
apps = ["libquantum", "omnetpp", "soplex", "leslie3d"] elif benchmark == "mix2": apps = ["lbm", "astar", "GemsFDTD", "milc"] elif benchmark == "mix3": apps = ["soplex", "xalancbmk", "gcc", "h264ref"] elif benchmark == "mix4": apps = ["mcf", "sphinx3", "zeusmp", "omnetpp"] ## Application Info os.environ['SIM_DESC'] = 'EIGHT_CORES' os.environ['OMP_NUM_THREADS'] = str(corecount) sst_root = os.getenv("SST_ROOT") ## MemHierarchy membus = sst.Component("membus", "memHierarchy.Bus") membus.addParams({ "bus_frequency": cacheFrequency, }) memory = sst.Component("memory", "memHierarchy.MemController") memory.addParams({ "do_not_back": "1", "range_start": "0", "coherence_protocol": coherenceProtocol, "debug": memDebug, "clock": memoryFrequency, "backend": "memHierarchy.cramsim", "backend.access_time": "1 ns", # Phy latency "backend.mem_size": "%dGiB" % memory_size, "backend.max_outstanding_requests": 1024,
# Automatically generated SST Python input import sst # Define SST core options sst.setProgramOption("timebase", "1 ps") sst.setProgramOption("stopAtCycle", "10000s") # Define the simulation components comp_msgGen0 = sst.Component( "msgGen0", "simpleElementExample.simpleMessageGeneratorComponent") comp_msgGen0.addParams({ "outputinfo": """0""", "sendcount": """100000""", "clock": """1MHz""" }) comp_msgGen1 = sst.Component( "msgGen1", "simpleElementExample.simpleMessageGeneratorComponent") comp_msgGen1.addParams({ "outputinfo": """0""", "sendcount": """100000""", "clock": """1MHz""" }) # Define the simulation links link_s_0_1 = sst.Link("link_s_0_1") link_s_0_1.connect((comp_msgGen0, "remoteComponent", "1000000ps"), (comp_msgGen1, "remoteComponent", "1000000ps")) # End of generated output.
def doQuad(quad, cores, rtr, rtrPort, netAddr): sst.pushNamePrefix("q%d" % quad) bus = sst.Component("membus", "memHierarchy.Bus") bus.addParams({ "bus_frequency": clock, "bus_latency_cycles": 1, }) for x in range(cores): core = 4 * quad + x # make the core if (useAriel == 0): coreObj = sst.Component("cpu_%d" % core, "memHierarchy.streamCPU") coreObj.addParams(cpuParams) # make l1 l1id = sst.Component("l1cache_%d" % core, "memHierarchy.Cache") l1id.addParams({ "coherence_protocol": coherence_protocol, "cache_frequency": clock, "replacement_policy": "lru", "cache_size": "8KB", "associativity": 8, "cache_line_size": 64, "access_latency_cycles": 2, "L1": 1, "debug": memDebug, "debug_level": 6, }) l1id.addParams(l1PrefetchParams) #connect L1 & Core if useAriel: arielL1Link = sst.Link("core_cache_link_%d" % core) portName = "cache_link_" + str(coreCtr.nextPort()) arielL1Link.connect((ariel, portName, busLat), (l1id, "high_network_0", busLat)) else: coreL1Link = sst.Link("core_cache_link_%d" % core) coreL1Link.connect((coreObj, "mem_link", busLat), (l1id, "high_network_0", busLat)) membusLink = sst.Link("cache_bus_link_%d" % core) membusLink.connect((l1id, "low_network_0", busLat), (bus, "high_network_%d" % x, busLat)) #make the L2 for the quad cluster l2 = sst.Component("l2cache_nid%d" % netAddr, "memHierarchy.Cache") l2.addParams({ "coherence_protocol": coherence_protocol, "cache_frequency": l2clock, "replacement_policy": "lru", "cache_size": "128KB", "associativity": 16, "cache_line_size": 64, "access_latency_cycles": 23, "mshr_num_entries": 4096, #64, # TODO: Cesar will update "L1": 0, "network_bw": coreNetBW, "debug_level": 6, "debug": memDebug }) l2.addParams(l2PrefetchParams) link = sst.Link("l2cache_%d_link" % quad) link.connect((l2, "high_network_0", busLat), (bus, "low_network_0", busLat)) link = sst.Link("l2cache_%d_netlink" % quad) link.connect((l2, "directory", netLat), (rtr, "port%d" % (rtrPort), netLat)) sst.popNamePrefix()
import sst sst.setProgramOption("timebase", "1ps") corecount = 8 prospero = {} for p in range(0, corecount): print("Creating prospero component core " + str(p)) prospero["prospero" + str(p)] = sst.Component("prospero" + str(p), "prospero.prospero") prospero["prospero" + str(p)].addParams({ "clock": "1GHz", "translateaddresses": "0", "trace": "/home/sdhammo/subversion/sst-simulator/sst/elements/ariel/pysdl/sort_omp/bsr_omp_algo4-" + str(p) + ".trace", "traceformat": "1", "outputlevel": "0", "timemultiplier": "0.00000000001" }) membus = sst.Component("membus", "memHierarchy.Bus") membus.addParams({ "numPorts": str(corecount + corecount + 2),
# Automatically generated SST Python input import sst # Define SST core options sst.setProgramOption("timebase", "1ps") sst.setProgramOption("stopAtCycle", "10000ns") # Define the simulation components comp_cpu = sst.Component("cpu", "memHierarchy.trivialCPU") comp_cpu.addParams({ "memSize" : "0x100000", "num_loadstore" : "10000", "commFreq" : "100", "do_write" : "1" }) comp_l1cache = sst.Component("l1cache", "memHierarchy.Cache") comp_l1cache.addParams({ "access_latency_cycles" : "5", "cache_frequency" : "2 Ghz", "replacement_policy" : "lru", "coherence_protocol" : "MSI", "associativity" : "4", "cache_line_size" : "64", "cache_size" : "4 KB", "L1" : "1", "debug" : "0" }) comp_l2cache = sst.Component("l2cache", "memHierarchy.Cache") comp_l2cache.addParams({ "access_latency_cycles" : "20", "cache_frequency" : "2 Ghz",
def build(self, nodeID, extraKeys): nic = sst.Component("nic" + str(nodeID), "firefly.nic") nic.addParams(self.nicParams) nic.addParams(extraKeys) nic.addParam("nid", nodeID) retval = (nic, "rtr", sst.merlin._params["link_lat"]) built = False if self.detailedModel: built = self.detailedModel.build(nodeID, self.numCores) memory = None if built: nic.addLink(self.detailedModel.getNicLink(), "detailed0", "1ps") memory = sst.Component("memory" + str(nodeID), "thornhill.MemoryHeap") memory.addParam("nid", nodeID) #memory.addParam( "verboseLevel", 1 ) loopBack = sst.Component("loopBack" + str(nodeID), "firefly.loopBack") loopBack.addParam("numCores", self.numCores) # Create a motifLog only for one core of the desired node(s) logCreatedforFirstCore = False # end for x in xrange(self.numCores): ep = sst.Component( "nic" + str(nodeID) + "core" + str(x) + "_EmberEP", "ember.EmberEngine") if built: links = self.detailedModel.getThreadLinks(x) cpuNum = 0 for link in links: ep.addLink(link, "detailed" + str(cpuNum), "1ps") cpuNum = cpuNum + 1 # Create a motif log only for the desired list of nodes (endpoints) # Delete the 'motifLog' parameter from the param list of other endpoints if 'motifLog' in self.driverParams: if self.driverParams['motifLog'] != '': if (self.motifLogNodes): for id in self.motifLogNodes: if nodeID == int( id) and logCreatedforFirstCore == False: #print str(nodeID) + " " + str(self.driverParams['jobId']) + " " + str(self.motifLogNodes) #print "Create motifLog for node {0}".format(id) logCreatedforFirstCore = True ep.addParams(self.driverParams) else: tempParams = copy.copy(self.driverParams) del tempParams['motifLog'] ep.addParams(tempParams) else: tempParams = copy.copy(self.driverParams) del tempParams['motifLog'] ep.addParams(tempParams) else: ep.addParams(self.driverParams) else: ep.addParams(self.driverParams) # end # Original version before motifLog #ep.addParams(self.driverParams) for id in self.statNodes: if nodeID == id: print "printStats for node {0}".format(id) ep.addParams({'motif1.printStats': 1}) ep.addParams({'hermesParams.netId': nodeID}) ep.addParams( {'hermesParams.netMapId': calcNetMapId(nodeID, self.nidList)}) ep.addParams({'hermesParams.netMapSize': self.numNids}) ep.addParams({'hermesParams.coreId': x}) nicLink = sst.Link("nic" + str(nodeID) + "core" + str(x) + "_Link") nicLink.setNoCut() loopLink = sst.Link("loop" + str(nodeID) + "core" + str(x) + "_Link") loopLink.setNoCut() #ep.addLink(nicLink, "nic", self.nicParams["nic2host_lat"] ) #nic.addLink(nicLink, "core" + str(x), self.nicParams["nic2host_lat"] ) ep.addLink(nicLink, "nic", "1ns") nic.addLink(nicLink, "core" + str(x), "1ns") ep.addLink(loopLink, "loop", "1ns") loopBack.addLink(loopLink, "core" + str(x), "1ns") if built: memoryLink = sst.Link("memory" + str(nodeID) + "core" + str(x) + "_Link") memoryLink.setNoCut() ep.addLink(memoryLink, "memoryHeap", "0 ps") memory.addLink(memoryLink, "detailed" + str(x), "0 ns") return retval
coreclock = "2.4GHz" uncoreclock = "1.4GHz" coherence = "MESI" network_bw = "60GB/s" DEBUG_IFACE = 0 DEBUG_L1 = 0 DEBUG_L2 = 0 DEBUG_L3 = 0 DEBUG_DIR = 0 DEBUG_MEM = 0 DEBUG_NOC = 0 verbose = 2 # Create merlin network - this is just simple single router comp_network = sst.Component("network", "merlin.hr_router") comp_network.addParams({ "xbar_bw": network_bw, "link_bw": network_bw, "input_buf_size": "2KiB", "num_ports": cores + caches + memories, "flit_size": "36B", "output_buf_size": "2KiB", "id": "0", "topology": "merlin.singlerouter" }) comp_network.setSubComponent("topology", "merlin.singlerouter") for x in range(cores): comp_cpu = sst.Component("cpu" + str(x), "memHierarchy.standardCPU") comp_cpu.addParams({
import sst d0 = sst.Component("d0", "simpleElementExample.simpleDistribComponent") d0.addParams({ "distrib" : "exponential", "lambda" : "0.5", "count" : "100000000", "binresults" : "1" })
# Automatically generated SST Python input import sst import os # Define SST core options sst.setProgramOption("timebase", "1ps") sst.setProgramOption("stopAtCycle", "5s") # Define the simulation components comp_cpu = sst.Component("cpu", "prospero.prosperoCPU") comp_cpu.addParams({ "verbose": "0", "reader": "prospero.ProsperoTextTraceReader", "readerParams.file": "sstprospero-0-0.trace" }) comp_l1cache = sst.Component("l1cache", "memHierarchy.Cache") comp_l1cache.addParams({ "access_latency_cycles": "1", "cache_frequency": "2 Ghz", "replacement_policy": "lru", "coherence_protocol": "MESI", "associativity": "8", "cache_line_size": "64", "L1": "1", "cache_size": "64 KB" }) comp_memory = sst.Component("memory", "memHierarchy.MemController") comp_memory.addParams({ "coherence_protocol": "MESI", "backend.access_time": "1000 ns", "backend.mem_size": "4906",
# Automatically generated SST Python input #-- copied from sdl-1.py import sst # Define SST core options sst.setProgramOption("timebase", "1ps") sst.setProgramOption("stopAtCycle", "0 ns") # Define the simulation components comp_cpu = sst.Component("cpu", "memHierarchy.trivialCPU") comp_cpu.addParams({ "do_write": "1", "num_loadstore": "1000", "commFreq": "100", "memSize": "0x1000" }) comp_l1cache = sst.Component("l1cache", "memHierarchy.Cache") comp_l1cache.addParams({ "access_latency_cycles": "4", "cache_frequency": "2 Ghz", "replacement_policy": "lru", "coherence_protocol": "MSI", "associativity": "4", "cache_line_size": "64", #"debug" : "1", "debug_level": "10", "L1": "1", "LL": "1", "cache_size": "2 KB" }) comp_memory = sst.Component("memory", "memHierarchy.MemController")
# Automatically generated SST Python input import sst # Testing # Different simpleDRAM parameters from simpleDRAM tests # mru/lru/nmru cache replacement # Lower latencies # DelayBuffer backend # Define the simulation components comp_cpu0 = sst.Component("cpu0", "memHierarchy.trivialCPU") comp_cpu0.addParams({ "commFreq": "100", "rngseed": "1", "do_write": "1", "num_loadstore": "10000", "memSize": "0x100000", }) comp_c0_l1cache = sst.Component("c0.l1cache", "memHierarchy.Cache") comp_c0_l1cache.addParams({ "access_latency_cycles": "1", "cache_frequency": "2Ghz", "replacement_policy": "mru", "coherence_protocol": "MESI", "associativity": "4", "cache_line_size": "64", "cache_size": "4 KB", "L1": "1", "debug": "0" }) comp_cpu1 = sst.Component("cpu1", "memHierarchy.trivialCPU")
import sst # Define SST core options sst.setProgramOption("stopAtCycle", "10us") # Set up sender using slot and user subcomponents loader0 = sst.Component("Loader0", "simpleElementExample.SubComponentLoaderLegacy") loader0.addParam("clock", "1.5GHz") loader0.enableAllStatistics() sub0 = loader0.setSubComponent("mySubComp", "simpleElementExample.SubCompSlotLegacy", 0) sub0_0 = sub0.setSubComponent("mySubCompSlot", "simpleElementExample.SubCompSenderLegacy", 0) sub0_0.addParam("sendCount", 15) sub0_0.enableAllStatistics() sub0_1 = sub0.setSubComponent("mySubCompSlot", "simpleElementExample.SubCompSenderLegacy", 1) sub0_1.addParam("sendCount", 15) sub0_1.enableAllStatistics() # Set up receiver using slot and user subcomponent loader1 = sst.Component("Loader1", "simpleElementExample.SubComponentLoaderLegacy") loader1.addParam("clock", "1.0GHz") sub1 = loader1.setSubComponent("mySubComp", "simpleElementExample.SubCompSlotLegacy", 0)
import sst from mhlib import componentlist DEBUG_L1 = 0 DEBUG_L2 = 0 DEBUG_L3 = 0 DEBUG_L4 = 0 DEBUG_MEM = 0 DEBUG_CORE0 = 0 DEBUG_CORE1 = 0 # Core 0 + L1 cpu0 = sst.Component("cpu0", "memHierarchy.trivialCPU") cpu0.addParams({ "memSize": "0x1000", "num_loadstore": "1000", "commFreq": "100", "do_write": "1" }) iface0 = cpu0.setSubComponent("memory", "memHierarchy.memInterface") c0_l1cache = sst.Component("c0.l1cache", "memHierarchy.Cache") c0_l1cache.addParams({ "access_latency_cycles": "3", "cache_frequency": "2 Ghz", "replacement_policy": "lru", "coherence_protocol": "MSI", "associativity": "2", "cache_line_size": "64", "debug_level": "10", "L1": "1",
import sst from sst.macro import * import sst.test latency = "1us" comp1 = sst.Component("1", "test.dummy_switch") comp1.addParam("id", 1) comp1.addParam("latency", latency) comp2 = sst.Component("2", "test.dummy_switch") comp2.addParam("id", 2) comp2.addParam("latency", latency) port = 0 comp1Id = 1 comp2Id = 2 makeBiNetworkLink(comp1, comp1Id, port, comp2, comp2Id, port, latency)
import sst # Define SST core options sst.setProgramOption("timebase", "1ps") sst.setProgramOption("stopAtCycle", "0 ns") # Tell SST what statistics handling we want sst.setStatisticLoadLevel(4) memory_mb = 1024 # Define the simulation components comp_cpu = sst.Component("cpu", "miranda.BaseCPU") comp_cpu.addParams({ "verbose": 1, "generator": "miranda.GUPSGenerator", "generatorParams.verbose": 0, "generatorParams.count": 10000, "generatorParams.max_address": ((memory_mb) / 2) * 1024 * 1024, }) # Enable statistics outputs comp_cpu.enableAllStatistics({"type": "sst.AccumulatorStatistic"}) comp_l1cache = sst.Component("l1cache", "memHierarchy.Cache") comp_l1cache.addParams({ "access_latency_cycles": "2",
"mem_size" : str(memory_capacity / (groups * memory_controllers_per_group)) + "MiB", } dc_params = { "coherence_protocol": coherence_protocol, "memNIC.network_bw": memory_network_bandwidth, "memNIC.interleave_size": str(mem_interleave_size) + "B", "memNIC.interleave_step": str((groups * memory_controllers_per_group) * mem_interleave_size) + "B", "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize "clock": memory_clock, "debug": 1, } print "Configuring Ariel processor model (" + str(groups * cores_per_group) + " cores)..." ariel = sst.Component("A0", "ariel.ariel") ariel.addParams({ "verbose" : "0", "maxcorequeue" : "256", "maxtranscore" : "16", "maxissuepercycle" : "2", "pipetimeout" : "0", "executable" : str(os.environ['OMP_EXE']), "appargcount" : "0", "arielinterceptcalls" : "1", "launchparamcount" : 1, "launchparam0" : "-ifeellucky", "arielmode" : "1", "corecount" : groups * cores_per_group, "clock" : str(clock) })
dataport1 = sst.Link("kRtr_data_port1_" + str(nodeId)) dataport1.connect((rtrdata, "local1", mesh_link_latency), dcdata) print("Building model...") # Build the mesh using Kingsley -> duplicate mesh for data & control kRtrReq = [] kRtrAck = [] kRtrFwd = [] kRtrData = [] for x in range(0, mesh_stops_x): for y in range(0, mesh_stops_y): nodeNum = len(kRtrReq) kRtrReq.append( sst.Component("krtr_req_" + str(nodeNum), "kingsley.noc_mesh")) kRtrReq[-1].addParams(ctrl_network_params) kRtrAck.append( sst.Component("krtr_ack_" + str(nodeNum), "kingsley.noc_mesh")) kRtrAck[-1].addParams(ctrl_network_params) kRtrFwd.append( sst.Component("krtr_fwd_" + str(nodeNum), "kingsley.noc_mesh")) kRtrFwd[-1].addParams(ctrl_network_params) kRtrData.append( sst.Component("krtr_data_" + str(nodeNum), "kingsley.noc_mesh")) kRtrData[-1].addParams(data_network_params) kRtrReq[-1].addParams({"local_ports": 2}) kRtrAck[-1].addParams({"local_ports": 2}) kRtrFwd[-1].addParams({"local_ports": 2}) kRtrData[-1].addParams({"local_ports": 2})
# Automatically generated SST Python input import sst # Test timingDRAM with transactionQ = reorderTransactionQ and AddrMapper=simpleAddrMapper and pagepolicy=simplePagePolicy(closed) # Define the simulation components cpu_params = { "clock": "3GHz", "do_write": 1, "num_loadstore": "5000", "memSize": "0x100000" } bus = sst.Component("bus", "memHierarchy.Bus") bus.addParams({"bus_frequency": "2Ghz"}) l3cache = sst.Component("l3cache", "memHierarchy.Cache") l3cache.addParams({ "access_latency_cycles": "30", "mshr_latency_cycles": 3, "cache_frequency": "2Ghz", "replacement_policy": "lru", "coherence_protocol": "MESI", "associativity": "16", "cache_line_size": "64", "cache_size": "64 KB", "debug": "0", "verbose": 2, "memNIC.network_bw": "25GB/s", })
import sst # Define SST core options sst.setProgramOption("stopAtCycle", "10us") # Set up senders using slot and user subcomponents loader0 = sst.Component("Loader0", "coreTestElement.SubComponentLoaderLegacy") loader0.addParam("clock", "1.5GHz") sub0 = loader0.setSubComponent("mySubComp", "coreTestElement.SubCompSlotLegacy", 0) sub0.addParam("sendCount", 15) sub0.addParam("unnamed_subcomponent", "coreTestElement.SubCompSenderLegacy") sub0.addParam("num_subcomps", "2") sub0.enableAllStatistics() # Set up receivers using slot and user subcomponents loader1 = sst.Component("Loader1", "coreTestElement.SubComponentLoaderLegacy") loader1.addParam("clock", "1.0GHz") sub1 = loader1.setSubComponent("mySubComp", "coreTestElement.SubCompSlotLegacy", 0) sub1.addParam("unnamed_subcomponent", "coreTestElement.SubCompReceiverLegacy") sub1.addParam("num_subcomps", "2") sub1.enableAllStatistics() # Set up links link0 = sst.Link("myLink0") link0.connect((sub0, "slot_port0", "5ns"), (sub1, "slot_port0", "5ns")) link1 = sst.Link("myLink1")
## Application Info: ## Executable -> exe_file ## appargcount -> Number of commandline arguments after <exec_file> name ## apparg<#> -> arguments ## Commandline execution for the below example would be ## /home/amdeshp/arch/benchmarks/PathFinder_1.0.0/PathFinder_ref/PathFinder.x -x /home/amdeshp/arch/benchmarks/PathFinder_1.0.0/generatedData/small1.adj_list ## AppArgs = ({ ## "executable" : "/home/amdeshp/arch/benchmarks/PathFinder_1.0.0/PathFinder_ref/PathFinder.x", ## "appargcount" : "0", ## "apparg0" : "-x", ## "apparg1" : "/home/amdeshp/arch/benchmarks/PathFinder_1.0.0/generatedData/small1.adj_list", ## }) ## Processor Model ariel = sst.Component("A0", "ariel.ariel") ## ariel.addParams(AppArgs) ariel.addParams({ "verbose": "1", "maxcorequeue": "256", "maxissuepercycle": "2", "pipetimeout": "0", "executable": "./stream", "memorylevels": "1", "arielinterceptcalls": "1", "launchparamcount": 1, "launchparam0": "-ifeellucky", "arielmode": "1", "corecount": corecount, "defaultlevel": defaultLevel, })