def read_file(self): """Populate the simulation attrubutes""" with open(self.filename, encoding='utf-8') as file: json_file = json.load(file) cache_dict = json_file["Cache"] cache_params = get_param_list(cache_dict, CACHE_PARAMS) self.cache = Cache(*cache_params) tlb_dict = json_file["TLB"] tlb_params = get_param_list(tlb_dict, TLB_PARAMS) self.tlb = TLB(*tlb_params) memory_dict = json_file["MemoriaFisica"] memory_params = get_param_list(memory_dict, MEM_PARAMS) self.memory = Memory(*memory_params) disk_write = self.memory.write_speed * 10 disk_read = self.memory.read_speed * 10 self.disk = Disk(disk_write, disk_read) self.page_table_size = self.memory.frames * 2 self.virtual_size = self.memory.size * 2 self.bits_address = int(log(self.virtual_size, 2)) self.bits_page = int(log(self.page_table_size, 2)) programs = json_file["Programas"] for program_dic in programs: program_parameters = get_param_list(program_dic, ["Nombre", "Secuencia"]) new_program = Program(*program_parameters, self.page_table_size) self.programs.append(new_program) self.program_map[new_program.id] = new_program self.stats[new_program.id] = ProgramStats()
def __init__(self, nLines=64, associativity=8, pageSize=0x1000, tlb=None, cache=None, hub=None): """Simple associative cache. Parameters ---------- size (int): Cache size in bytes. (Default 0x8000 (32 kB)) associtivilty (int): Number of ways for an associative cache, -1 for fully associative. cacheLine (int): Number of bytes per cache line, determiines the number of offset bits. child (Cache): The next level of cache, must be a hub for etlb, default is None, which means default Hub. """ self.nLines = nLines self.associativity = associativity self.hub = hub self.cache = cache self.tlb = tlb self.pageSize = pageSize if self.associativity == -1: self.associativity = self.nLines if self.hub is None: self.hub = Hub(associativity=self.associativity, pageSize=self.pageSize) if self.cache is None: self.cache = Cache(size=0x8000, associativity=16) self.cache.accessEnergy = 0.0111033 self.cache.accessTime = 4 self.cache.tagTime = 1 self.cache.tagEnergy = 0.000539962 self.hub.eTLB = self self.cacheLine = self.cache.cacheLine self.nSets = self.nLines // self.associativity self.offsetBits = int(math.ceil(math.log2(self.cacheLine))) self.wayBits = int(math.ceil(math.log2(self.associativity))) self.pageBits = int(math.ceil(math.log2(self.pageSize))) - self.offsetBits self.setBits = int(math.ceil(math.log2(self.nSets))) self.tagBits = 48 - self.setBits - self.pageBits - self.offsetBits if self.tlb is None: self.tlb = TLB(512, self.tagBits + self.setBits) self.freeList = [list(range(self.associativity)) for i in range(self.nSets)] self.counter = 0 self.entries = [[ETLBEntry(self.pageSize, self.cacheLine) for i in range(self.associativity)] for j in range(self.nSets)] self.hit = [0,0,0,0] #DRAM,L1I,L1D,L2 Note: L1 is actually a unified cache at present, for forward compatability if separate caches are ever implemented self.miss = 0 self.cycles = 0 self.energy = 0.
def main(): """ Main Routine """ print("\n[.] Initializing parameters/settings for simulator...") print("[.] Values in brackets represent reccommended/tested values.") print("[.] Using untested values may result in unstable behavior.\n") # Ask for parameters user_debug = input("[?] Enable debugging information [No]: ") debug = (user_debug == "Yes") memory_size = input("[?] Size of main memory (bytes) [100]: ") virtual_memory_size = input("[?] Size of virtual memory (bytes) [8000]: ") cache_size = input("[?] Size of cache (bytes)[40]: ") block_size = input("[?] Size of cache blocks (bytes)[4]: ") page_size = input("[?] Size of disk pages (bytes)[32]: ") table_size = input("[?] Number of TLB table entries (bytes)[10]: ") # Initialize components with bus and debug flag bus = Bus(debug) cpu = CPU(debug) cache = Cache(int(cache_size), int(block_size), debug) tlb = TLB(int(table_size), debug) memory = Memory(int(memory_size), int(virtual_memory_size), debug) disk = Disk(int(page_size), debug) # Initialize GUI menu = GUI(bus, cpu, cache, tlb, memory, disk, debug) menu.menu_loop()
class Simulation: def __init__(self, filename, print_to): self.filename = filename self.cache = None self.tlb = None self.memory = None self.disk = None self.running = None self.page_table_size = None self.virtual_size = None self.programs = [] self.curr_program = None self.curr_stats = None self.program_map = {} self.stats = {} self.bits_address = None self.bits_page = None self.print = print_to def read_file(self): """Populate the simulation attrubutes""" with open(self.filename, encoding='utf-8') as file: json_file = json.load(file) cache_dict = json_file["Cache"] cache_params = get_param_list(cache_dict, CACHE_PARAMS) self.cache = Cache(*cache_params) tlb_dict = json_file["TLB"] tlb_params = get_param_list(tlb_dict, TLB_PARAMS) self.tlb = TLB(*tlb_params) memory_dict = json_file["MemoriaFisica"] memory_params = get_param_list(memory_dict, MEM_PARAMS) self.memory = Memory(*memory_params) disk_write = self.memory.write_speed * 10 disk_read = self.memory.read_speed * 10 self.disk = Disk(disk_write, disk_read) self.page_table_size = self.memory.frames * 2 self.virtual_size = self.memory.size * 2 self.bits_address = int(log(self.virtual_size, 2)) self.bits_page = int(log(self.page_table_size, 2)) programs = json_file["Programas"] for program_dic in programs: program_parameters = get_param_list(program_dic, ["Nombre", "Secuencia"]) new_program = Program(*program_parameters, self.page_table_size) self.programs.append(new_program) self.program_map[new_program.id] = new_program self.stats[new_program.id] = ProgramStats() def simulate(self): self.running = deque(self.programs) while self.running: program = self.running.popleft() self.switch_context(program) next_access = self.curr_program.next_direction() has_directions = True while next_access != -1 and has_directions: # input() # print("\n" * 3) # print(f"Acceso a memoria virtual: {next_access}") self.process_access(next_access) if not self.curr_program.has_next_direction(): has_directions = False else: next_access = self.curr_program.next_direction() #print("Cambio de contexto") if self.curr_program.has_next_direction(): self.running.append(self.curr_program) def switch_context(self, program): # print() # print(f"#################################") # print(f"# Cambio de contexto a {program.name} id:{program.id}") # print(f"################################\n") self.curr_program = program self.curr_stats = self.stats[program.id] self.tlb.invalidate_entries() #self.cache.invalidate_entries() def process_access(self, virtual_access): memory_address = self.process_virtual_access(virtual_access) self.process_memory_access(memory_address) if self.print: print("\n" * 3) self.tlb.print_table() self.curr_program.print_page_table() self.cache.print_table() self.memory.print_table(self.program_map) self.disk.print_table(self.program_map) def process_virtual_access(self, virtual_access): program = self.curr_program binary_address = decimal_to_binary(virtual_access, self.bits_address) page_binary = binary_address[:self.bits_page] offset_binary = binary_address[self.bits_page:] page_decimal = binary_to_decimal(page_binary) offset_decimal = binary_to_decimal(offset_binary) # print(f"Pagina {page_decimal} | Offset: {offset_decimal}") if self.tlb.read(page_decimal): self.curr_stats.mark_tlb_hit() frame = self.tlb.get_element(page_decimal) # print(f"TLB Hit: {page_decimal} -> {frame}") return self.memory_address_from_frame_offset(frame, offset_decimal) # In case not in TLB self.curr_stats.mark_tlb_miss() page_table = program.page_table if page_table.map_present(page_decimal): self.curr_stats.mark_page_hit() mapped_frame = page_table.get_frame(page_decimal) # print(f"Page Table hit: {page_decimal} -> {mapped_frame}") self.tlb.write(page_decimal, mapped_frame) self.curr_stats.mark_tlb_write() return self.memory_address_from_frame_offset( mapped_frame, offset_decimal) self.curr_stats.mark_page_fault() # Request allocation if page_table.is_in_disk(page_decimal): # print("Page table in Disk") self.curr_stats.mark_swap_in() self.disk.pop(program.pid, page_decimal) page_table.mark_not_disk(page_decimal) # Store in memory the program variables frame, previous = self.memory.request_allocation( program.id, page_decimal) self.curr_stats.mark_memory_write() if previous is not None: ## Invalidar direcciones de cache! self.cache_invalidate_frame(frame) # print(f"Invalidating cache directions associated with frame {frame}") pid, program_page = previous prev_program = self.program_map[pid] prev_program.page_table.invalidate(frame) # print(f"Invalidating {prev_program.name} entry of Page Table") self.curr_stats.mark_swap_out() self.store_in_disk(pid, program_page) # print(f"Swaping out from frame {frame} and storing {self.program_map[pid].name}:{pid}, Pagina {program_page}") # Write page_table page_table.add_entry(page_decimal, frame) self.tlb.write(page_decimal, frame) self.curr_stats.mark_tlb_write() return self.memory_address_from_frame_offset(frame, offset_decimal) def cache_invalidate_frame(self, frame): min_range = self.memory.page_size * frame max_range = self.memory.page_size * (frame + 1) self.cache.invalidate_range(min_range, max_range) def process_memory_access(self, memory_access): memory_access_bin = decimal_to_binary(memory_access, self.bits_address - 1) if self.cache.read(memory_access_bin): #self.curr_stats.mark_memory_write() # Happy path! # print("Cache Hit!") self.curr_stats.mark_cache_hit() return self.curr_stats.mark_cache_miss() self.curr_stats.mark_memory_read() # Aumenta read count self.cache.write(memory_access_bin) self.curr_stats.mark_cache_write() def memory_address_from_frame_offset(self, frame, offset): frame_bin = decimal_to_binary(frame, self.bits_page) offset_bin = decimal_to_binary(offset, self.bits_address - self.bits_page) return binary_to_decimal(frame_bin + offset_bin) def store_in_disk(self, pid, program_page): """Performs a swap out from memory""" program_out = self.program_map[pid] program_out.page_table.mark_disk(program_page) self.disk.push(pid, program_page) def write_output(self, output_file): with open(output_file, "w", encoding="utf-8") as file: #file.write("Programa,HRT,HRC,PF,SO,SI,ST,TT,CT,MT\n") for program in self.programs: stats = self.stats[program.id] tlb_read = stats.tlb_hit_count + stats.tlb_miss_count cache_read = stats.cache_hit_count + stats.cache_miss_count hrt = stats.tlb_hit_count / tlb_read hrc = stats.cache_hit_count / cache_read pf = stats.page_fault so = stats.swap_out_count si = stats.swap_in_count st = (so * (self.disk.write_speed + self.memory.read_speed) + si * (self.disk.read_speed + self.memory.write_speed)) tt = (tlb_read * self.tlb.read_speed + stats.tlb_write_count * self.tlb.write_speed) ct = (cache_read * self.cache.read_speed + stats.cache_write_count * self.cache.write_speed) mt = (stats.memory_read_count * self.memory.read_speed + stats.memory_write_count * self.memory.write_speed) line_list = [hrt, hrc, pf, so, si, st, tt, ct, mt] line = ",".join([program.name] + [str(x) for x in line_list]) + "\n" file.write(line)
def __init__(self, component_id, component_name, \ stat_dict, config_dict, sim_dict, ruby, core_i=0): self.name = "core" self.id = "core" self.parameters = \ { "clock_rate" : ["1000","Clock Rate in MHz"], "vdd" : ["0","0 means using ITRS default vdd"], "power_gating_vcc" : \ ["-1","\"-1\" means using default power gating virtual power" "supply voltage constrained by technology and computed" "automatically"], "opt_local" : \ ["0","for cores with unknown timing, set to 0 to" "force off the opt flag"], "instruction_length" : ["32",""], "opcode_width" : ["16",""], "x86" : ["1",""], "micro_opcode_width" : ["8",""], "machine_type" : \ ["0","Specifies the machine type inorder/OoO; 1" "inorder; 0 OOO"], "number_hardware_threads" : \ ["2","number_instruction_fetch_ports(icache ports) is always 1" "in single-thread processor, it only may be more than one in" "SMT processors. BTB ports always equals to fetch ports since" "branch information in consecutive branch instructions in the" "same fetch group can be read out from BTB once." "(cpu.numThreads)"], "fetch_width" : ["16","(cpu.fetchWidth)"], "number_instruction_fetch_ports" : \ ["1","fetch_width determines the size of cachelines of L1" "cache block"], "decode_width" : \ ["16","decode_width determines the number of ports of the" "renaming table (both RAM and CAM) scheme (cpu.decodeWidth)"], "issue_width" : ["16","(cpu.issueWidth)"], "peak_issue_width" : \ ["16","issue_width determines the number of ports of Issue" "window and other logic as in the complexity effective" "processors paper; issue_width==dispatch_width" "(cpu.issueWidth)"], "commit_width" : \ ["16","commit_width determines the number of ports of register" "files (cpu.commitWidth)"], "fp_issue_width" : \ ["2","Issue width of the Floating Poing Unit"], "prediction_width" : \ ["1","number of branch instructions can be predicted" "simultaneously"], "pipelines_per_core" : \ ["1,1","Current version of McPAT does not distinguish int and" "floating point pipelines. Theses parameters are reserved for" "future use. integer_pipeline and floating_pipelines, if the" "floating_pipelines is 0, then the pipeline is shared"], "pipeline_depth" : \ ["31,31","pipeline depth of int and fp, if pipeline is shared," "the second number is the average cycles of fp ops issue and" "exe unit"], "ALU_per_core" : \ ["6","contains an adder, a shifter, and a logical unit"], "MUL_per_core" : ["1","For MUL and Div"], "FPU_per_core" : ["2","buffer between IF and ID stage"], "instruction_buffer_size" : \ ["32","buffer between ID and sche/exe stage"], "decoded_stream_buffer_size" : ["16",""], "instruction_window_scheme" : \ ["0","0 PHYREG based, 1 RSBASED. McPAT support 2 types of OoO" "cores, RS based and physical reg based"], "instruction_window_size" : ["32","(cpu.numIQEntries)"], "fp_instruction_window_size" : \ ["32","The instruction issue Q as in Alpha 21264; The RS as in" "Intel P6 (cpu.numIQEntries)"], "ROB_size" : \ ["32","Each in-flight instruction has an entry in ROB" "(cpu.numROBEntries)"], "archi_Regs_IRF_size" : \ ["16","Number of Architectural Integer General Purpose" "Registers specified by the ISA: X86-64 has 16GPR"], "archi_Regs_FRF_size" : \ ["32","Number of Architectural Registers specified by the ISA:" "MMX + XMM"], "phy_Regs_IRF_size" : \ ["32","Number of Physical Integer Registers (cpu.numPhysIntRegs)"], "phy_Regs_FRF_size" : \ ["32","Number of Physical FP Registers (cpu.numPhysFloatRegs)"], "rename_scheme" : \ ["0","can be RAM based(0) or CAM based(1) rename scheme " "RAM-based scheme will have free list, status table; CAM-based " "scheme have the valid bit in the data field of the CAM"], "checkpoint_depth" : \ ["0","RAM and CAM RAT contains checkpoints, checkpoint_depth=# " "of in_flight speculations; RAM-based RAT should not have more " "than 4 GCs (e.g., MIPS R10000). McPAT assumes the exsistance " "of RRAT when the RAM-RAT having no GCs (e.g., Netburst) " "CAM-based RAT should have at least 1 GC and can have more than " "8 GCs."], "register_windows_size" : \ ["0","How many windows in the windowed register file, sun " "processors; no register windowing is used when this number is " "0. In OoO cores, loads and stores can be issued whether " "inorder(Pentium Pro) or (OoO)out-of-order(Alpha), They will " "always try to execute out-of-order though."], "LSU_order" : ["inorder","Load/Store Unit (LSU) Ordering"], "store_buffer_size" : ["16","Store Queue Entries (cpu.SQEntries)"], "load_buffer_size" : \ ["16","By default, in-order cores do not have load buffers " "(cpu.LQEntries)"], "memory_ports" : \ ["2","max_allowed_in_flight_memo_instructions determines the # " "of ports of load and store buffer as well as the ports of " "Dcache which is connected to LSU. Dual-pumped Dcache can be " "used to save the extra read/write ports. Number of ports refer " "to sustain-able concurrent memory accesses"], "RAS_size" : ["2","Branch Predictor RAS Size"], "number_of_BPT" : ["2","Number of Branch Predictor Tables (BPT)"], "number_of_BTB" : ["2","Number of Branch Target Buffers (BTB)"] } self.stats = \ { "total_instructions" : ["0","cpu.iq.iqInstsIssued"], "int_instructions" : \ ["0","iq.FU_type_0::No_OpClass + iq.FU_type_0::IntAlu +" "iq.FU_type_0::IntMult + iq.FU_type_0::IntDiv +" "iq.FU_type_0::IprAccess"], "fp_instructions" : \ ["0","cpu.iq.FU_type_0::FloatAdd + cpu.iq.FU_type_0::FloatCmp" "+ cpu.iq.FU_type_0::FloatCvt + cpu.iq.FU_type_0::FloatMult +" "cpu.iq.FU_type_0::FloatDiv + cpu.iq.FU_type_0::FloatSqrt"], "branch_instructions" : ["0","cpu.branchPred.condPredicted"], "branch_mispredictions" : ["0","cpu.branchPred.condIncorrect"], "load_instructions" : \ ["0","cpu.iq.FU_type_0::MemRead + cpu.iq.FU_type_0::InstPrefetch"], "store_instructions" : ["0","cpu.iq.FU_type_0::MemWrite"], "committed_instructions" : ["0","cpu.commit.committedOps"], "committed_int_instructions" : ["0","cpu.commit.int_insts"], "committed_fp_instructions" : ["0","cpu.commit.fp_insts"], "pipeline_duty_cycle" : \ ["1","<=1, runtime_ipc/peak_ipc; averaged for all cores if" "homogeneous"], "total_cycles" : ["1","cpu.numCycles"], "idle_cycles" : ["0","cpu.num_idle_cycles"], "busy_cycles" : ["1","cpu.numCycles - cpu.num_idle_cycles"], "ROB_reads" : ["0","cpu.rob.rob_reads"], "ROB_writes" : ["0","cpu.rob.rob_writes"], "rename_reads" : \ ["0","lookup in renaming logic (cpu.rename.int_rename_lookups)"], "rename_writes" : \ ["0","cpu.rename.RenamedOperands * " "cpu.rename.int_rename_lookups / cpu.rename.RenameLookups"], "fp_rename_reads" : ["0","cpu.rename.fp_rename_lookups"], "fp_rename_writes" : \ ["0","cpu.rename.RenamedOperands * " "cpu.rename.fp_rename_lookups / cpu.rename.RenameLookups"], "inst_window_reads" : ["0","cpu.iq.int_inst_queue_reads"], "inst_window_writes" : ["0","cpu.iq.int_inst_queue_writes"], "inst_window_wakeup_accesses" : \ ["0","cpu.iq.int_inst_queue_wakeup_accesses"], "fp_inst_window_reads" : ["0","cpu.iq.fp_inst_queue_reads"], "fp_inst_window_writes" : ["0","cpu.iq.fp_inst_queue_writes"], "fp_inst_window_wakeup_accesses" : \ ["0","cpu.iq.fp_inst_queue_wakeup_accesses"], "int_regfile_reads" : ["0","cpu.int_regfile_reads"], "float_regfile_reads" : ["0","cpu.fp_regfile_reads"], "int_regfile_writes" : ["1","cpu.int_regfile_writes"], "float_regfile_writes" : ["1","cpu.fp_regfile_writes"], "function_calls" : ["0","cpu.commit.function_calls"], "context_switches" : ["0","cpu.workload.num_syscalls"], "ialu_accesses" : ["1","cpu.iq.int_alu_accesses"], "fpu_accesses" : ["1","cpu.iq.fp_alu_accesses"], "mul_accesses" : ["1","cpu.iq.fu_full::FloatMult"], "cdb_alu_accesses" : ["1","cpu.iq.int_alu_accesses"], "cdb_mul_accesses" : ["1","cpu.iq.fp_alu_accesses"], "cdb_fpu_accesses" : ["1","cpu.iq.fp_alu_accesses"], "IFU_duty_cycle" : ["0.25",""], "LSU_duty_cycle" : ["0.25",""], "MemManU_I_duty_cycle" : ["0.25",""], "MemManU_D_duty_cycle" : ["0.25",""], "ALU_duty_cycle" : ["1",""], "MUL_duty_cycle" : ["0.3",""], "FPU_duty_cycle" : ["0.3",""], "ALU_cdb_duty_cycle" : ["1",""], "MUL_cdb_duty_cycle" : ["0.3",""], "FPU_cdb_duty_cycle" : ["0.3",""] } self.predictor = None self.itlb = None self.icache = None self.dtlb = None self.dcache = None self.btb = None self.name = component_name self.id = component_id # Init the Directory Parameters and Stats: self.parameters["clock_rate"][0]= \ str(float(sim_dict["frequency"][core_i])) self.parameters["vdd"][0] = str(float(sim_dict["voltage"])) self.parameters["power_gating_vcc"][0] = "-1" self.parameters["opt_local"][0] = "0" self.parameters["instruction_length"][0] = "32" self.parameters["opcode_width"][0] = "16" self.parameters["x86"][0] = "1" self.parameters["micro_opcode_width"][0] = "8" self.parameters["machine_type"][0] = "0" self.parameters["number_hardware_threads"][0]= \ str(int(config_dict["numThreads"])) self.parameters["fetch_width"][0] = str(int(config_dict["fetchWidth"])) self.parameters["number_instruction_fetch_ports"][0] = "1" self.parameters["decode_width"][0]= \ str(int(config_dict["decodeWidth"])) self.parameters["issue_width"][0] = str(int(config_dict["issueWidth"])) self.parameters["peak_issue_width"][0]= \ str(int(config_dict["issueWidth"])) self.parameters["commit_width"][0]= \ str(int(config_dict["commitWidth"])) self.parameters["fp_issue_width"][0]= \ str(int(config_dict["fuPool.FUList2.count"])) self.parameters["prediction_width"][0]= \ str(int(config_dict["branchPred.numThreads"])) self.parameters["pipelines_per_core"][0] = "1,1" self.parameters["pipeline_depth"][0] = "31,31" self.parameters["ALU_per_core"][0]= \ str(int(config_dict["fuPool.FUList0.count"])) self.parameters["MUL_per_core"][0]= \ str(int(config_dict["fuPool.FUList1.count"])) self.parameters["FPU_per_core"][0]= \ str(int(config_dict["fuPool.FUList2.count"]) \ +int(config_dict["fuPool.FUList3.count"])) self.parameters["instruction_buffer_size"][0]= \ str(int(config_dict["fetchBufferSize"])) self.parameters["decoded_stream_buffer_size"][0] = "16" self.parameters["instruction_window_scheme"][0] = "0" self.parameters["instruction_window_size"][0]= \ str(int(config_dict["numIQEntries"])) self.parameters["fp_instruction_window_size"][0]= \ str(int(config_dict["numIQEntries"])) self.parameters["ROB_size"][0] = str(int(config_dict["numROBEntries"])) self.parameters["archi_Regs_IRF_size"][0] = "16" self.parameters["archi_Regs_FRF_size"][0] = "32" self.parameters["phy_Regs_IRF_size"][0]= \ str(int(config_dict["numPhysIntRegs"])) self.parameters["phy_Regs_FRF_size"][0]= \ str(int(config_dict["numPhysFloatRegs"])) self.parameters["rename_scheme"][0] = "0" self.parameters["checkpoint_depth"][0] = "0" self.parameters["register_windows_size"][0] = "0" self.parameters["LSU_order"][0] = "inorder" self.parameters["store_buffer_size"][0]= \ str(int(config_dict["SQEntries"])) self.parameters["load_buffer_size"][0]= \ str(int(config_dict["SQEntries"])) self.parameters["memory_ports"][0] = "2" self.parameters["RAS_size"][0]= \ str(int(config_dict["branchPred.RASSize"])) self.parameters["number_of_BPT"][0]= \ str(int(config_dict["numThreads"])) self.parameters["number_of_BTB"][0]= \ str(int(config_dict["numThreads"])) self.stats["total_instructions"][0]= \ str(int(stat_dict["iq.iqInstsIssued"][1])+1) self.stats["int_instructions"][0]= \ str(int(stat_dict["iq.FU_type_0::No_OpClass"][1]) \ +int(stat_dict["iq.FU_type_0::IntAlu"][1]) \ +int(stat_dict["iq.FU_type_0::IntMult"][1]) \ +int(stat_dict["iq.FU_type_0::IntDiv"][1]) \ +int(stat_dict["iq.FU_type_0::IprAccess"][1])+1) self.stats["fp_instructions"][0]= \ str(int(stat_dict["iq.FU_type_0::FloatAdd"][1]) \ +int(stat_dict["iq.FU_type_0::FloatCmp"][1]) \ +int(stat_dict["iq.FU_type_0::FloatCvt"][1]) \ +int(stat_dict["iq.FU_type_0::FloatMult"][1]) \ +int(stat_dict["iq.FU_type_0::FloatDiv"][1]) \ +int(stat_dict["iq.FU_type_0::FloatSqrt"][1])) self.stats["branch_instructions"][0]= \ str(int(stat_dict["branchPred.condPredicted"][1])) self.stats["branch_mispredictions"][0]= \ str(int(stat_dict["branchPred.condIncorrect"][1])) self.stats["load_instructions"][0]= \ str(int(stat_dict["iq.FU_type_0::MemRead"][1]) \ +int(stat_dict["iq.FU_type_0::InstPrefetch"][1])) self.stats["store_instructions"][0]= \ str(int(stat_dict["iq.FU_type_0::MemWrite"][1])) self.stats["committed_instructions"][0]= \ str(int(stat_dict["commit.committedOps"][1])) self.stats["committed_int_instructions"][0]= \ str(int(stat_dict["commit.int_insts"][1])) self.stats["committed_fp_instructions"][0]= \ str(int(stat_dict["commit.fp_insts"][1])) #self.stats["pipeline_duty_cycle"][0]= \ # str(float(stat_dict["ipc_total"][1])) self.stats["pipeline_duty_cycle"][0]= \ str(float(1.0)) # self.stats["total_cycles"][0] = str(int(stat_dict["numCycles"][1])) # self.stats["idle_cycles"][0] = str(int(stat_dict["idleCycles"][1])) # self.stats["busy_cycles"][0]= \ # str(int(stat_dict["numCycles"][1])-int(stat_dict["idleCycles"][1])) self.stats["total_cycles"][0] = "1" self.stats["idle_cycles"][0] = "0" self.stats["busy_cycles"][0] = "1" self.stats["ROB_reads"][0] = str(int(stat_dict["rob.rob_reads"][1])) self.stats["ROB_writes"][0] = str(int(stat_dict["rob.rob_writes"][1])) self.stats["rename_reads"][0]= \ str(int(stat_dict["rename.int_rename_lookups"][1])) self.stats["rename_writes"][0]= \ str(int(stat_dict["rename.RenamedOperands"][1]) \ *int(stat_dict["rename.int_rename_lookups"][1]) \ /(1+int(stat_dict["rename.RenameLookups"][1]))) self.stats["fp_rename_reads"][0]= \ str(int(stat_dict["rename.fp_rename_lookups"][1])) self.stats["fp_rename_writes"][0]= \ str(int(stat_dict["rename.RenamedOperands"][1]) \ *int(stat_dict["rename.fp_rename_lookups"][1]) \ /(1+int(stat_dict["rename.RenameLookups"][1]))) self.stats["inst_window_reads"][0]= \ str(int(stat_dict["iq.int_inst_queue_reads"][1])) self.stats["inst_window_writes"][0]= \ str(int(stat_dict["iq.int_inst_queue_writes"][1])) self.stats["inst_window_wakeup_accesses"][0]= \ str(int(stat_dict["iq.int_inst_queue_wakeup_accesses"][1])) self.stats["fp_inst_window_reads"][0]= \ str(int(stat_dict["iq.fp_inst_queue_reads"][1])) self.stats["fp_inst_window_writes"][0]= \ str(int(stat_dict["iq.fp_inst_queue_writes"][1])) self.stats["fp_inst_window_wakeup_accesses"][0]= \ str(int(stat_dict["iq.fp_inst_queue_wakeup_accesses"][1])) self.stats["int_regfile_reads"][0]= \ str(int(stat_dict["int_regfile_reads"][1])) self.stats["float_regfile_reads"][0]= \ str(int(stat_dict["fp_regfile_reads"][1])) self.stats["int_regfile_writes"][0]= \ str(int(stat_dict["int_regfile_writes"][1])) self.stats["float_regfile_writes"][0]= \ str(int(stat_dict["fp_regfile_writes"][1])) self.stats["function_calls"][0]= \ str(int(stat_dict["commit.function_calls"][1])) self.stats["context_switches"][0]= \ str(int(stat_dict["workload.numSyscalls"][1]) \ if "workload.numSyscalls" in stat_dict.keys() else 0) self.stats["ialu_accesses"][0]= \ str(int(stat_dict["iq.int_alu_accesses"][1])) self.stats["fpu_accesses"][0]= \ str(int(stat_dict["iq.fp_alu_accesses"][1])) self.stats["mul_accesses"][0]= \ str(int(stat_dict["iq.fu_full::FloatMult"][1])) self.stats["cdb_alu_accesses"][0]= \ str(int(stat_dict["iq.int_alu_accesses"][1])) self.stats["cdb_mul_accesses"][0]= \ str(int(stat_dict["iq.fp_alu_accesses"][1])) self.stats["cdb_fpu_accesses"][0]= \ str(int(stat_dict["iq.fp_alu_accesses"][1])) self.stats["IFU_duty_cycle"][0] = "0.25" self.stats["LSU_duty_cycle"][0] = "0.25" self.stats["MemManU_I_duty_cycle"][0] = "0.25" self.stats["MemManU_D_duty_cycle"][0] = "0.25" self.stats["ALU_duty_cycle"][0] = "1" self.stats["MUL_duty_cycle"][0] = "0.3" self.stats["FPU_duty_cycle"][0] = "0.3" self.stats["ALU_cdb_duty_cycle"][0] = "1" self.stats["MUL_cdb_duty_cycle"][0] = "0.3" self.stats["FPU_cdb_duty_cycle"][0] = "0.3" self.predictor = Predictor \ ( \ self.id+".predictor", \ "PBT", \ prune_dict("branchPred.",stat_dict), \ prune_dict("branchPred.",config_dict, "0"), \ sim_dict \ ) self.itlb = TLB \ ( \ self.id+".itlb", \ "itlb", \ prune_dict("itb_walker_cache.",stat_dict), \ prune_dict("itb.",config_dict, "0"), \ sim_dict \ ) self.icache = ICache \ ( \ self.id+".icache", \ "icache", \ prune_dict("icache.",stat_dict), \ prune_dict("icache.",config_dict, "0"), \ sim_dict \ ) self.dtlb = TLB \ ( \ self.id+".dtlb", \ "dtlb", \ prune_dict("dtb_walker_cache.",stat_dict), \ prune_dict("dtb.",config_dict, "0"), \ sim_dict \ ) self.dcache = DCache \ ( \ self.id+".dcache", \ "dcache", \ prune_dict("dcache.",stat_dict), \ prune_dict("dcache.",config_dict, "0"), \ sim_dict \ ) self.btb = BTB \ ( \ self.id+".BTB", \ "BTB", \ prune_dict("branchPred.",stat_dict), \ prune_dict("branchPred.",config_dict, "0"), \ sim_dict \ )
class ETLB: def __init__(self, nLines=64, associativity=8, pageSize=0x1000, tlb=None, cache=None, hub=None): """Simple associative cache. Parameters ---------- size (int): Cache size in bytes. (Default 0x8000 (32 kB)) associtivilty (int): Number of ways for an associative cache, -1 for fully associative. cacheLine (int): Number of bytes per cache line, determiines the number of offset bits. child (Cache): The next level of cache, must be a hub for etlb, default is None, which means default Hub. """ self.nLines = nLines self.associativity = associativity self.hub = hub self.cache = cache self.tlb = tlb self.pageSize = pageSize if self.associativity == -1: self.associativity = self.nLines if self.hub is None: self.hub = Hub(associativity=self.associativity, pageSize=self.pageSize) if self.cache is None: self.cache = Cache(size=0x8000, associativity=16) self.cache.accessEnergy = 0.0111033 self.cache.accessTime = 4 self.cache.tagTime = 1 self.cache.tagEnergy = 0.000539962 self.hub.eTLB = self self.cacheLine = self.cache.cacheLine self.nSets = self.nLines // self.associativity self.offsetBits = int(math.ceil(math.log2(self.cacheLine))) self.wayBits = int(math.ceil(math.log2(self.associativity))) self.pageBits = int(math.ceil(math.log2(self.pageSize))) - self.offsetBits self.setBits = int(math.ceil(math.log2(self.nSets))) self.tagBits = 48 - self.setBits - self.pageBits - self.offsetBits if self.tlb is None: self.tlb = TLB(512, self.tagBits + self.setBits) self.freeList = [list(range(self.associativity)) for i in range(self.nSets)] self.counter = 0 self.entries = [[ETLBEntry(self.pageSize, self.cacheLine) for i in range(self.associativity)] for j in range(self.nSets)] self.hit = [0,0,0,0] #DRAM,L1I,L1D,L2 Note: L1 is actually a unified cache at present, for forward compatability if separate caches are ever implemented self.miss = 0 self.cycles = 0 self.energy = 0. def access(self, address, write=False, count=True, countTime=None, countEnergy=None): """Access a given address. Parameters ---------- address (int): The address which is accessed. write (bool): True if the access is a write, False for a read (default read). This parameter is unused currently, but maintained for future use. count (bool): Whether hit/miss rate should be counted (default is True). """ # Step 1 in fig2/3d offset = address % self.cacheLine pageIndex = (address >> self.offsetBits) % (1 << self.pageBits) setIndex = (address >> (self.offsetBits + self.pageBits)) % self.nSets tag = address >> (self.setBits + self.pageBits + self.offsetBits) if countTime is None: countTime = count if countEnergy is None: countEnergy = count #eTLB Hit hit = False for i,entry in enumerate(self.entries[setIndex]): if entry.valid and entry.vtag == tag: hit = True loc = entry.location[pageIndex] way = entry.way[pageIndex] if count: self.hit[loc] += 1 # Not in cache fig2c if loc == 0: # Access to DRAM not simulated, send to CPU (step 2/3) # ((entry.paddr << self.pageBits) + pageIndex ) <<self.offsetBits) + offset # Evict from L1 (step 4) L1Set = (address >> (self.offsetBits + self.pageBits)) % self.cache.nSets if len(self.cache.freeList[L1Set]) == 0: self.evictCache(L1Set, countEnergy=countEnergy) # Update Hub pointer, place data (step 5) L1Way = self.cache.freeList[L1Set].pop() self.cache.accessDirect(L1Set, L1Way, countTime=False, countEnergy=countEnergy) etlbPointer = (i << self.setBits) + setIndex hubWay = 0 hubSet = entry.paddr % self.hub.nSets for j in range(self.hub.associativity): if self.hub.entries[hubSet][j].valid and self.hub.entries[hubSet][j].eTLBPointer == etlbPointer: hubWay = j break self.cache.tags[L1Set][L1Way] = (hubWay << self.hub.setBits) + (entry.paddr % self.hub.nSets) # Update the CLT (step 6) entry.location[pageIndex] = 2 #L1D (unified L1) entry.way[pageIndex] = L1Way # In L1 (data and instruction caches unified, this needs to be split if those are split) fig2a elif loc == 1 or loc == 2: # access the L1 cache entry, send to CPU (step 2/3) cacheSetIndex = (address >> self.offsetBits) % self.cache.nSets self.cache.accessDirect(cacheSetIndex, way, countTime=countTime, countEnergy=countEnergy) # In L2 fig2b elif loc == 3: # access the L2 cache entry, send to CPU (step 2/3) cacheSetIndex = entry.paddr % self.hub.cache.nSets self.hub.cache.accessDirect(cacheSetIndex, way, countTime=countTime, countEnergy=countEnergy) # Evict from L1 (step 4) L1Set = (address >> (self.offsetBits + self.pageBits)) % self.cache.nSets if len(self.cache.freeList[L1Set]) == 0: self.evictCache(L1Set, countEnergy=countEnergy) # Update Hub pointer, place data (step 5) L1Way = self.cache.freeList[L1Set].pop() self.cache.accessDirect(L1Set, L1Way, countTime=False, countEnergy=countEnergy) self.cache.tags[L1Set][L1Way] = self.hub.cache.tags[cacheSetIndex][way] # Update the CLT (step 6) entry.location[pageIndex] = 2 #L1D (unified L1) entry.way[pageIndex] = L1Way # Free the L2 entry so it can be used again (Only one copy, which is now in L1) self.hub.cache.evict(cacheSetIndex, way, countEnergy=countEnergy) # Invalid location else: raise ValueError("Location in CLT is invalid, expected 2 bit int, got %d"%loc) way = i break #eTLB Miss fig3d if not hit: if count: self.miss += 1 # Evict if necessary (step 2) if len(self.freeList[setIndex]) == 0: self.evict(setIndex) way = self.freeList[setIndex].pop() entry = self.entries[setIndex][way] # Update the virtual and physical address, calling the TLB (step 3) entry.vtag = tag entry.paddr = self.tlb.translateVirt((tag << self.setBits) + setIndex) addr = (((entry.paddr << self.pageBits) + pageIndex) << self.offsetBits) + offset # access the Hub (step 4) hubEntry = self.hub.access(addr, write=write, count=count, countEnergy=countEnergy, countTime=countTime) # Copy the CLT (step 5) entry.way = hubEntry.way.copy() entry.location = hubEntry.location.copy() entry.valid = True hubSet = entry.paddr % self.hub.nSets # Update the eTLBPointer, and Valid bit (step 6) hubEntry.eTLBValid = True hubEntry.eTLBPointer = (way << self.setBits) + setIndex self.access(address, write, count=False, countEnergy=True, countTime=False) self.counter += 1 self.entries[setIndex][way].lastAccess = self.counter def evict(self, setNumber, way=None): """Evict (i.e. add to the free list) a cache line. If `way` is None, LRU replacement policy is used among occupied lines. If `way` is an integer, that integer is added to the free list. """ if way is not None: if way not in self.freeList[setNumber]: self.freeList[setNumber].append(way) return way else: way = 0 minAccess = self.entries[setNumber][0].lastAccess entry = self.entries[setNumber][0] for i,ent in enumerate(self.entries[setNumber]): if i not in self.freeList[setNumber] and self.entries[setNumber][i].lastAccess < minAccess: way = i minAccess = self.entries[setNumber][i].lastAccess entry = self.entries[setNumber][i] eTLBPointer = (way << self.setBits) + setNumber hubSet = entry.paddr % self.hub.nSets hubWay = -1 for i in range(self.hub.associativity): if self.hub.entries[hubSet][i].eTLBPointer == eTLBPointer: hubWay = i break if hubWay == -1: raise RuntimeError("Entry not in hub when expected") self.hub.entries[hubSet][hubWay].location = entry.location.copy() self.hub.entries[hubSet][hubWay].way = entry.way.copy() self.hub.entries[hubSet][hubWay].eTLBValid = False if way not in self.freeList[setNumber]: self.freeList[setNumber].append(way) return way def evictCache(self, setNumber, way=None, countEnergy=True): # Fig3f # Select A Victim, acess its hub pointer (step 1) if way == None: way = self.cache.selectEviction(setNumber) hubPointer = self.cache.tags[setNumber][way] # Find the set (step 2) L2Set = hubPointer % self.hub.cache.nSets # If needed, evict a line (step 3) if len(self.hub.cache.freeList[L2Set]) == 0: self.hub.evictCache(L2Set, countEnergy=countEnergy) # Move the data/hub pointer (step 4) L2Way = self.hub.cache.freeList[L2Set].pop() self.hub.cache.accessDirect(L2Set, L2Way, countTime=False, countEnergy=countEnergy) self.hub.cache.tags[L2Set][L2Way] = hubPointer # Update the active CLT (step 5) hubSet = hubPointer % self.hub.nSets hubWay = hubPointer >> self.hub.setBits hubEntry = self.hub.entries[hubSet][hubWay] if hubEntry.eTLBValid: etlbSet = hubEntry.eTLBPointer % self.nSets etlbWay = hubEntry.eTLBPointer >> self.setBits entry = self.entries[etlbSet][etlbWay] for pageIndex in range(entry.nEntries): if entry.location[pageIndex] == 2 and entry.location[pageIndex] == way: entry.location[pageIndex] = 3 #L2 entry.way[pageIndex] = L2Way else: for pageIndex in range(hubEntry.nEntries): if hubEntry.location[pageIndex] == 2 and hubEntry.location[pageIndex] == way: hubEntry.location[pageIndex] = 3 #L2 hubEntry.way[pageIndex] = L2Way # Actually evict self.cache.evict(setNumber, way, countEnergy=countEnergy)