def __init__(self, env_name, num_episodes=100, alpha=0.9, gamma=0.9, epsilon=1e-2, model_loop=3, min_alpha=0.01, decay_freq=100): """ :param model_loop: number of times using model to update Q-value """ super(DynaQ, self).__init__(env_name, num_episodes, alpha, gamma, epsilon, policy="epsilon_greedy", model_loop=model_loop, min_alpha=min_alpha, decay_freq=decay_freq) self.m_table = Array2D(self.obs_size, self.action_size) self._history = Memory()
class DynaQ(TableBase): def __init__(self, env_name, num_episodes=100, alpha=0.9, gamma=0.9, epsilon=1e-2, model_loop=3, min_alpha=0.01, decay_freq=100): """ :param model_loop: number of times using model to update Q-value """ super(DynaQ, self).__init__(env_name, num_episodes, alpha, gamma, epsilon, policy="epsilon_greedy", model_loop=model_loop, min_alpha=min_alpha, decay_freq=decay_freq) self.m_table = Array2D(self.obs_size, self.action_size) self._history = Memory() def _loop(self): done = False total_reward, reward = 0, 0 self.state = self.env.reset() while not done: action = self.policy() self._history((self.state, action)) _state, reward, done, _ = self.env.step(action) self.q_table[self.state, action] += self.alpha * ( reward + self.gamma * self.q_table[_state].max() - self.q_table[self.state, action]) self.m_table[self.state, action] = (reward, _state) # use model to update Q for _ in range(self.model_loop): s, a = self._history.sample() r, _s = self.m_table[s, a] self.q_table[s, a] += self.alpha * ( r + self.gamma * self.q_table[_s].max() - self.q_table[s, a]) total_reward += reward self.state = _state return total_reward def schedule_alpha(self, episode): if self.alpha > self.min_alpha and episode % self.decay_freq == 0 and episode != 0: self.alpha = self.alpha / (episode / self.decay_freq)
class FileProcess(object): IS_PC = 0 IS_MEM = 1 IS_REG = 2 mem = Memory() def __init__(self, mem): self.mem = mem def processLine(self, line): if line[0] is ( '#' ): # if starts with a pound symbol is a comment line and we ignore return "#" else: return line[0].translate( None, "[]" ) # get the memory address on the first value of the input file def getRealAddress(self, location): try: return (int(location, 16), self.IS_MEM) # see if it's a hex address except: if location == "PC": return (0, self.IS_PC) else: return (self.mem.get_register_num(location), self.IS_REG) def process(self, file): for line in file: location = self.processLine(line) if location is "#": continue addr = self.getRealAddress(location) iterLines = iter(line) # make a iterator over the array next(iterLines) # skip the first element counter = 0 for s in iterLines: # iterate over each string of the line try: address = addr[0] + counter counter += 4 val = int(s, 16) # check if the value is a hex string # print val if addr[1] == self.IS_MEM: self.mem.set_val_to_address(val, address) # print "Addr: " + str(address) # print "Addr Val: " + str(self.mem.mem[address >> 4]) elif addr[1] == self.IS_PC: self.mem.get_registers().set_initial_pc(val) elif addr[1] == self.IS_REG: self.mem.get_registers().set_value_for_register( address, val) else: continue except ValueError: break # break the loop if the value is no longer a hex string
def __init__(self): self.memory = Memory(self.MEMORY_SIZE) self.program_counter_low = Component("PL") self.program_counter_high = Component("PH") self.program_counter = DoubleComponent("P", self.program_counter_low, self.program_counter_high) self.stack_pointer = Component(self.STACK_POINTER) self.stack_pointer.set_contents(self.MEMORY_SIZE - 1) self.registers = self._define_registers() self.registers_by_name = {reg.name: reg for reg in self.registers} self.instructions = self._define_instructions() self.instructions_by_opcode = { ins.opcode: ins for ins in self.instructions } self.instructions_by_name = { ins.name: ins for ins in self.instructions }
from syscall import Syscall from commands import Command def usage(): print "Usage: simulator [mode] [input file]" print "Modes available: -d (debug), -n (normal)" sys.exit(0) def debug(): print "Debug mode is ON!" print "" if __name__ == "__main__": print "Welcome to Mips Simulator by Marcus Gabilheri" inputFile = [] mem = Memory() fProcess = FileProcess(mem) if len(sys.argv) != 3: usage() if sys.argv[1] == "-d": debug = True elif sys.argv[1] == "-n": debug = False else: usage() lines = [line.strip() for line in open(sys.argv[2])] for l in lines: if not l: continue l = re.sub(' +', ',', l)
def test_LDRV(): program_counter = Component("P") A = Component("A") memory = Memory(256) LDAV = LDRV("LDAV", 1, memory, program_counter, [A]) assert LDAV.LENGTH == 2
def usage(): print "Usage: simulator [mode] [input file]" print "Modes available: -d (debug), -n (normal)" sys.exit(0) def debug(): print "Debug mode is ON!" print "" if __name__ == "__main__": print "Welcome to Mips Simulator by Marcus Gabilheri" inputFile = [] mem = Memory() fProcess = FileProcess(mem) if len(sys.argv) != 3: usage() if sys.argv[1] == "-d": debug = True elif sys.argv[1] == "-n": debug = False else: usage() lines = [line.strip() for line in open(sys.argv[2])] for l in lines: if not l: continue l = re.sub(' +', ',', l)
class CPUTest(InstructionBase): MEMORY_SIZE = 256 PROGRAM_COUNTER = "P" STACK_POINTER = "S" def __init__(self): self.memory = Memory(self.MEMORY_SIZE) self.program_counter_low = Component("PL") self.program_counter_high = Component("PH") self.program_counter = DoubleComponent("P", self.program_counter_low, self.program_counter_high) self.stack_pointer = Component(self.STACK_POINTER) self.stack_pointer.set_contents(self.MEMORY_SIZE - 1) self.registers = self._define_registers() self.registers_by_name = {reg.name: reg for reg in self.registers} self.instructions = self._define_instructions() self.instructions_by_opcode = { ins.opcode: ins for ins in self.instructions } self.instructions_by_name = { ins.name: ins for ins in self.instructions } def _define_registers(self): registers = [] self.A = Component("A") self.B = Component("B") self.D = Component("D") self.C = Component("C") self.R = Component("R") self.H = Component("H") self.L = Component("L") self.HL = DoubleComponent("HL", self.L, self.H) registers = [ self.A, self.B, self.D, self.C, self.R, self.program_counter, self.stack_pointer, self.H, self.L, self.HL, ] return registers def _define_instructions(self): instructions = [ LDRV("LDAV", 1, self.memory, self.program_counter, [self.A]), LDRV("LDBV", 2, self.memory, self.program_counter, [self.B]), LDRR("LDAB", 3, self.memory, self.program_counter, [self.A, self.B]), LDRR("LDBA", 4, self.memory, self.program_counter, [self.B, self.A]), LDRM("LDAM", 5, self.memory, self.program_counter, [self.A]), LDRM("LDBM", 6, self.memory, self.program_counter, [self.B]), LDMR("LDMA", 7, self.memory, self.program_counter, [self.A]), LDMR("LDMB", 8, self.memory, self.program_counter, [self.B]), GTRR("GTAB", 9, self.memory, self.program_counter, [self.A, self.B, self.R]), GTRR("GTBA", 10, self.memory, self.program_counter, [self.B, self.A, self.R]), JMPV("JMPV", 11, self.memory, self.program_counter), JMPV("JMPRV", 12, self.memory, self.program_counter, [self.R]), INCR("INCA", 13, self.memory, self.program_counter, [self.A, self.C]), INCR("INCB", 14, self.memory, self.program_counter, [self.B, self.C]), ADDRR("ADDAB", 15, self.memory, self.program_counter, [self.A, self.B, self.C]), ADDRR("ADDBA", 16, self.memory, self.program_counter, [self.B, self.A, self.C]), DECR("DECB", 170, self.memory, self.program_counter, [self.B, self.C]), SUBRR("SUBAB", 18, self.memory, self.program_counter, [self.A, self.B, self.C]), SUBRR("SUBBA", 19, self.memory, self.program_counter, [self.B, self.A, self.C]), LTRR("LTAB", 20, self.memory, self.program_counter, [self.A, self.B, self.R]), LTRR("LTBA", 21, self.memory, self.program_counter, [self.B, self.A, self.R]), EQRR("EQAB", 22, self.memory, self.program_counter, [self.A, self.B, self.R]), EQRR("EQBA", 23, self.memory, self.program_counter, [self.B, self.A, self.R]), JMNV("JMNRV", 24, self.memory, self.program_counter, [self.R]), JMPV("JMPCV", 25, self.memory, self.program_counter, [self.C]), JMNV("JMNCV", 26, self.memory, self.program_counter, [self.C]), LDIMRV("LDIMAV", 27, self.memory, self.program_counter, [self.A]), LDIMRV("LDIMBV", 28, self.memory, self.program_counter, [self.B]), LDIMRR("LDIMAB", 29, self.memory, self.program_counter, [self.A, self.B]), LDIMRR("LDIMBA", 30, self.memory, self.program_counter, [self.B, self.A]), LDRV("LDDV", 31, self.memory, self.program_counter, [self.D]), LDRR("LDDA", 32, self.memory, self.program_counter, [self.D, self.A]), LDRR("LDDB", 33, self.memory, self.program_counter, [self.D, self.B]), LDIMRV("LDIMDV", 34, self.memory, self.program_counter, [self.D]), LDIMRR("LDIMDA", 35, self.memory, self.program_counter, [self.D, self.A]), LDIMRR("LDIMDB", 36, self.memory, self.program_counter, [self.D, self.B]), INCR("INCD", 37, self.memory, self.program_counter, [self.D, self.C]), DECR("DECD", 38, self.memory, self.program_counter, [self.D, self.C]), ADDRR("ADDDA", 39, self.memory, self.program_counter, [self.D, self.A, self.C]), ADDRR("ADDDB", 40, self.memory, self.program_counter, [self.D, self.B, self.C]), GTRR("GTDB", 41, self.memory, self.program_counter, [self.D, self.B, self.R]), LDIRRM("LDIADM", 42, self.memory, self.program_counter, [self.A, self.D]), LDIRRM("LDIBDM", 43, self.memory, self.program_counter, [self.B, self.D]), PUSHR("PUSHA", 44, self.memory, self.stack_pointer, [self.A]), PUSHR("PUSHB", 45, self.memory, self.stack_pointer, [self.B]), PUSHR("PUSHD", 46, self.memory, self.stack_pointer, [self.D]), POPR("POPA", 47, self.memory, self.stack_pointer, [self.A]), POPR("POPB", 48, self.memory, self.stack_pointer, [self.B]), POPR("POPD", 49, self.memory, self.stack_pointer, [self.D]), GTRR("GTAD", 50, self.memory, self.program_counter, [self.A, self.D, self.R]), GTRR("GTBD", 51, self.memory, self.program_counter, [self.B, self.D, self.R]), JMPR("JMPA", 52, self.memory, self.program_counter, [self.A]), JMPR("JMRA", 53, self.memory, self.program_counter, [self.A, self.R]), PUSHR("PUSHP", 54, self.memory, self.stack_pointer, [self.program_counter]), LDRM("LDDM", 55, self.memory, self.program_counter, [self.D]), JMPR("JMPB", 56, self.memory, self.program_counter, [self.B]), DECR("DECA", 57, self.memory, self.program_counter, [self.A, self.C]), CALLV("CALLV", 58, self.memory, self.program_counter, None, self.stack_pointer), CALLV("CALRV", 59, self.memory, self.program_counter, [self.R], self.stack_pointer), CALLV("CALCV", 60, self.memory, self.program_counter, [self.C], self.stack_pointer), RET("RET", 61, self.memory, self.program_counter, None, self.stack_pointer), RET("RETR", 62, self.memory, self.program_counter, [self.R], self.stack_pointer), RET("RETC", 63, self.memory, self.program_counter, [self.C], self.stack_pointer), CALNV("CANRV", 64, self.memory, self.program_counter, [self.R], self.stack_pointer), CALNV("CANCV", 65, self.memory, self.program_counter, [self.C], self.stack_pointer), RETN("RETNR", 66, self.memory, self.program_counter, [self.R], self.stack_pointer), RETN("RETNC", 67, self.memory, self.program_counter, [self.C], self.stack_pointer), LDMR("LDMD", 68, self.memory, self.program_counter, [self.D]), ADDRR("ADDHLA", 69, self.memory, self.program_counter, [self.HL, self.B, self.C]), INCR("INCHL", 70, self.memory, self.program_counter, [self.HL, self.C]), DECR("DECHL", 71, self.memory, self.program_counter, [self.HL, self.C]), ] return instructions def _decimal_to_hex(self, decimal_int): return hex(decimal_int)[2:] def _hex_to_decimal(self, hex_str): return int(hex_str, 16) def _load_file_to_list(self, filename): row_list = [] with open(filename, 'r') as f: row_list = [r.strip('\n') for r in f.readlines()] return row_list def run(self, debug=False): instruction_count = 0 old_state, new_state = {}, {} print("Starting CPU execution at {}".format( self.program_counter.get_contents())) if debug: old_state = self.copy_current_state_values_into_dict() opcode = self.get_memory_location_contents_and_inc_pc() while opcode != 0: self.instructions_by_opcode[opcode].run() instruction_count += 1 if debug: new_state = self.copy_current_state_values_into_dict() changes = self.compare_state_copies(old_state, new_state) if self.PROGRAM_COUNTER in changes: pc_changes = changes[self.PROGRAM_COUNTER] if pc_changes[1] - pc_changes[ 0] == self.instructions_by_opcode[opcode].LENGTH: changes.pop(self.PROGRAM_COUNTER, None) print(self.instructions_by_opcode[opcode].name, changes) old_state = new_state opcode = self.get_memory_location_contents_and_inc_pc() print("Ending CPU execution at {}, instruction count {}".format( self.program_counter.get_contents(), instruction_count)) def disassemble(self): disassembly = [] address = 0 while address < self.MEMORY_SIZE: contents = self.memory.get_contents_value(address) current_row = [address, contents] if contents in self.instructions_by_opcode: instruction = self.instructions_by_opcode[contents] current_row.append(instruction.name) number_of_args = instruction.LENGTH - 1 for _ in range(number_of_args): address += 1 current_row.append(self.memory.get_contents_value(address)) disassembly.append(current_row) address += 1 return disassembly def disassemble_to_file(self, filename="cpu.csv"): disassembly = self.disassemble() with open(filename, "w", newline="") as f: writer = csv.writer(f) writer.writerows(disassembly) def simple_assembler(self, filename): variable_dict = {} line_count = 0 row_list = self._load_file_to_list(filename) contents_list = [] for row in row_list: row = row.replace(' ', '') symbols = row.split(',') if ';' in symbols[0]: continue if '=' in symbols[0]: variable_value = symbols[0].split('=') variable_dict[variable_value[0]] = int(variable_value[1]) continue if ':' in symbols[0]: variable_dict[symbols[0][:-1]] = line_count continue if symbols[0] in self.instructions_by_name: instruction = self.instructions_by_name[symbols[0]] if len(symbols) != instruction.LENGTH: raise ('{} length is {} but symbols are {}'.format( instruction.name, instruction.LENGTH, symbols)) symbols[0] = instruction.opcode line_count += instruction.LENGTH for row in row_list: row = row.replace(' ', '') symbols = row.split(',') if ';' in symbols[0]: continue if '=' in symbols[0]: continue if ':' in symbols[0]: continue if symbols[0] in self.instructions_by_name: instruction = self.instructions_by_name[symbols[0]] symbols[0] = instruction.opcode for symbol in symbols: if symbol in variable_dict: symbol = variable_dict[symbol] contents_list.append(int(symbol)) self.memory.load(contents_list) def get_all_registers_contents(self): return {r.name: r.get_contents() for r in self.registers} def copy_current_state_values_into_dict(self): state_copy = self.get_all_registers_contents() for i, contents in enumerate(self.memory.dump()): state_copy[i] = contents return state_copy def compare_state_copies(self, old, new): changes = {} for component in old: if old[component] != new[component]: changes[component] = (old[component], new[component]) return changes