def second_pass(): """ docstring """ code_parser = Parser(sys.argv[1]) memory_counter = 16 with open(output_file, 'w') as file_object: while (code_parser.has_more_commands()): code_parser.advance() word = '' if code_parser.command_type() == 'C_COMMAND': word = '111' + code_trans.comp( code_parser.comp()) + code_trans.dest( code_parser.dest()) + code_trans.jump( code_parser.jump()) + '\n' elif code_parser.command_type() == 'A_COMMAND': if is_integer(code_parser.symbol()): word = binary_word(code_parser.symbol()) + '\n' elif SymbolTable.contains(code_parser.symbol()): word = SymbolTable.get_address(code_parser.symbol()) + '\n' elif not SymbolTable.contains(code_parser.symbol()): SymbolTable.add_entry(code_parser.symbol(), binary_word(memory_counter)) word = binary_word(memory_counter) + '\n' memory_counter = memory_counter + 1 elif code_parser.command_type() == 'L_COMMAND': pass file_object.write(word)
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('asm_file', type=str, help='asm file') args = parser.parse_args() asm_file = args.asm_file save_file = os.path.splitext(asm_file)[0] + ".hack" st = SymbolTable() with HackParser(asm_file) as hp: op_address = 0 while hp.advance() != None: cmd_type = hp.command_type() if cmd_type == A_COMMAND or cmd_type == C_COMMAND: op_address += 1 elif cmd_type == L_COMMAND: st.add_entry(hp.symbol(), op_address) with HackParser(asm_file) as hp: with open(save_file, 'w') as wf: while hp.advance() != None: cmd_type = hp.command_type() if cmd_type == A_COMMAND: symbol = hp.symbol() m = symbol_pattern.match(symbol) if m.group(1): # @value bincode = "0" + int2bin(int(m.group(1)), 15) elif m.group(2): # @symbol symbol = m.group(2) if st.contains(symbol): address = st.get_address(symbol) bincode = "0" + int2bin(address, 15) else: st.add_variable(symbol) address = st.get_address(symbol) bincode = "0" + int2bin(address, 15) elif cmd_type == C_COMMAND: bincode = '111' + code_writer.comp( hp.comp()) + code_writer.dest( hp.dest()) + code_writer.jump(hp.jump()) if cmd_type != L_COMMAND: wf.write(bincode + '\n')
def first_pass(): code_parser = Parser(sys.argv[1]) program_counter = 0 while (code_parser.has_more_commands()): code_parser.advance() if code_parser.command_type() == 'C_COMMAND': program_counter = program_counter + 1 elif code_parser.command_type() == 'A_COMMAND': program_counter = program_counter + 1 elif code_parser.command_type() == 'L_COMMAND': if not SymbolTable.contains(code_parser.symbol()): SymbolTable.add_entry(code_parser.symbol(), binary_word(program_counter))
def generate_symbols(file): symbol_table = SymbolTable() instruction_number = 0 with open(file) as f: parser = Parser(f) while parser.has_more_commands(): if parser.command_type() is 'L_COMMAND' and not symbol_table.contains(parser.symbol()): symbol_table.add_entry(parser.symbol(), instruction_number) if parser.command_type() is 'A_COMMAND' or parser.command_type() is 'C_COMMAND': instruction_number += 1 # line = decoder.bin(p.symbol()) + '\n' parser.advance() return symbol_table
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('asm_file', type=str, help='asm file') args = parser.parse_args() asm_file = args.asm_file save_file = os.path.splitext(asm_file)[0] + ".hack" st = SymbolTable() with HackParser(asm_file) as hp: op_address = 0 while hp.advance() != None: cmd_type = hp.command_type() if cmd_type == A_COMMAND or cmd_type == C_COMMAND: op_address += 1 elif cmd_type == L_COMMAND: st.add_entry(hp.symbol(), op_address) with HackParser(asm_file) as hp: with open(save_file, 'w') as wf: while hp.advance() != None: cmd_type = hp.command_type() if cmd_type == A_COMMAND: symbol = hp.symbol() m = symbol_pattern.match(symbol) if m.group(1): # @value bincode = "0" + int2bin(int(m.group(1)), 15) elif m.group(2): # @symbol symbol = m.group(2) if st.contains(symbol): address = st.get_address(symbol) bincode = "0" + int2bin(address, 15) else: st.add_variable(symbol) address = st.get_address(symbol) bincode = "0" + int2bin(address, 15) elif cmd_type == C_COMMAND: bincode = '111' + code_writer.comp(hp.comp()) + code_writer.dest(hp.dest()) + code_writer.jump(hp.jump()) if cmd_type != L_COMMAND: wf.write(bincode + '\n')
def main(): filename = sys.argv[1].split('.')[0] symbol_table = SymbolTable() first_iter(symbol_table) parser = Parser(filename) code = Code() output = [] address = 16 while (parser.has_more_commands()): parser.advance() if parser.command_type() == 'C_COMMAND': dest = parser.dest() comp = parser.comp() jump = parser.jump() # print(parser.current_command, code.dest(dest), code.comp(comp), code.jump(jump)) output.append("111" + code.comp(comp) + code.dest(dest) + code.jump(jump)) else: symbol = parser.symbol() try: symbol_address = int(symbol) except: if not symbol_table.contains(symbol): symbol_table.add_entry(symbol, address) address += 1 symbol_address = symbol_table.get_address(symbol) finally: output.append(bin(symbol_address)[2:].zfill(16)) # print(parser.current_command, bin(symbol_table.get_address(symbol))) # if not symbol_table.contains(symbol): # symbol_table.add_entry(symbol, address) # address += 1 # symbol_address = symbol_table.get_address(symbol) # print(symbol_address) # output.append(bin(symbol_table.get_address(symbol))[2:].zfill(16)) # print(parser.current_command, bin(symbol_table.get_address(symbol))) # print(symbol_address) # output.append(bin(symbol_table.get_address(symbol))[2:].zfill(16)) # print(parser.current_command, bin(symbol_table.get_address(symbol))) parser.close() hack_file = open(filename + '.hack', 'w') for line in output: hack_file.write(line + '\n') hack_file.close()
def two_pass_assembly(self): symbol_table = SymbolTable() rom_address = 0 print("starting first pass") while(self.parser.has_more_commands()): cmd = self.parser.get_current_command() if(self.parser.commands.L_COMMAND==self.parser.command_type()): print "L: " + self.parser.symbol() symbol_table.add_entry(self.parser.symbol(),rom_address) else: rom_address += 1 self.parser.advance() self.parser.reset() #the second pass is the same as the first without the print statements, #and without handling (Xxx) syntax print("starting second pass") while(self.parser.has_more_commands()): cmd = self.parser.get_current_command() if(self.parser.commands.A_COMMAND==self.parser.command_type()): sym = self.parser.symbol() if(sym.isdigit()): val = sym else: if(symbol_table.contains(sym)): val = symbol_table.get_address(sym) else: symbol_table.add_entry(sym, rom_address) rom_address += 1 val = rom_address self.parser.output.write("0" + '{0:015b}'.format(int(val))+"\n") elif(self.parser.commands.C_COMMAND==self.parser.command_type()): self.parser.output.write("111" + self.code.comp(self.parser.comp()) + self.code.dest(self.parser.dest()) + self.code.jump(self.parser.jump()) +"\n") self.parser.advance()
symbol_table = SymbolTable() binary_code = [] ram_address = 16 flag = 0 register_symble(parser, symbol_table) parser.reset_idx() while parser.hasMoreCommands(): parser.advance() cmdtype = parser.commandType() if cmdtype == "A_COMMAND": symbol = parser.symbol() if re.search("[A-Za-z]", symbol) != None: if symbol_table.contains(symbol): address = symbol_table.getAddress(symbol) #16進数 else: address = format(ram_address, '04x') #16進数 symbol_table.addEntry(symbol, address) ram_address += 1 symbol = int(address, 16) binary = bin(int(symbol))[2:] bit16 = ("0" * (16 - len(binary))) + binary elif cmdtype == "C_COMMAND": dest_bin = code.dest2bin(parser.dest()) comp_bin = code.comp2bin(parser.comp()) jump_bin = code.jump2bin(parser.jump())
class Assembler: def __init__(self): self._code = Code() self._lexer = Lexer() self._parser = Parser() self._symbol_table = SymbolTable() def assemble(self, path): # Read file content content = self._read_file_contents(path) # Lex the file contents self._lexer.lex(content) # Pass the tokens to ther parser self._parser.set_tokens(self._lexer.tokens()) # Build symbols self._build_symbols() # Translate to symbolless asm self._write_symbolless_asm(path) # Translate binary self._write_binary(path) def _write_symbolless_asm(self, path): basename = os.path.basename(path) name = basename.rsplit('.asm', 1)[0] file = open('%s/%sL.asm' % (os.path.dirname(path), name), 'w') self._parser.reset_pos() while self._parser.has_commands(): command_type = self._parser.command_type() if command_type is Command.L_COMMAND: self._parser.advance() continue elif command_type is Command.A_COMMAND: value = None symbol = self._parser.symbol() if symbol is not None: value = self._symbol_table.get_address(symbol) if value is None: sys.stderr.write('[Assembler]: Symbol not defined %s' % symbol) file.close() sys.exit(1) else: value = self._parser.value() file.write('@%s' % value) elif command_type in [ Command.C_COMMAND_JMP, Command.C_COMMAND_COMP ]: file.write(self._parser.text()) file.write('\n') self._parser.advance() file.close() def _write_binary(self, path): basename = os.path.basename(path) name = basename.rsplit('.asm', 1)[0] file = open('%s/%s.hack' % (os.path.dirname(path), name), 'w') binary = self._translate() file.write(binary) file.close() def _build_symbols(self): symbols_count = 0 self._init_symbol_table() # First pass - get label symbols while self._parser.has_commands(): if self._parser.command_type() is Command.L_COMMAND: symbol = self._parser.symbol() if self._symbol_table.contains(symbol): sys.stderr.write( '[Assembler]: Symbol %s is used more than once' % symbol) sys.exit(1) address = self._parser.pos() - symbols_count self._symbol_table.add_entry(symbol, address) symbols_count += 1 self._parser.advance() # Second pass - get variable symbols variable_address = 16 self._parser.reset_pos() while self._parser.has_commands(): command_type = self._parser.command_type() if command_type is Command.A_COMMAND: symbol = self._parser.symbol() if symbol is not None and \ not self._symbol_table.contains(symbol): address = variable_address self._symbol_table.add_entry(symbol, address) variable_address += 1 self._parser.advance() def _translate(self): output = '' self._parser.reset_pos() while self._parser.has_commands(): command_type = self._parser.command_type() if command_type is Command.A_COMMAND: symbol = self._parser.symbol() if symbol is not None: if not self._symbol_table.contains(symbol): sys.stderr.write('[Assembler]: Unknown symbol: %s' % symbol) sys.exit(1) address = self._symbol_table.get_address(symbol) output += self._to_binary(address) + '\n' else: value = self._parser.value() output += self._to_binary(int(value)) + '\n' elif command_type is Command.C_COMMAND_COMP: comp = self._code.comp(self._parser.comp()) dest = self._code.dest(self._parser.dest()) output += '111' + comp + dest + '000\n' elif command_type is Command.C_COMMAND_JMP: comp = self._code.comp(self._parser.comp()) jump = self._code.jump(self._parser.jump()) output += '111' + comp + '000' + jump + '\n' self._parser.advance() return output def _init_symbol_table(self): self._symbol_table.clear() entries = { 'SP': 0, 'LCL': 1, 'ARG': 2, 'THIS': 3, 'THAT': 4, 'SCREEN': 16384, 'KBD': 24576, } # R0 to R15 for address in range(16): entries['R%d' % address] = address # Initialize symbol table with predefined symbols for key, value in entries.items(): self._symbol_table.add_entry(key, value) def _read_file_contents(self, path): file = open(path, 'r') content = file.read() file.close() return content def _to_binary(self, value): return format(value, '016b')
class Parser(object): A_CMD = 'A_COMMAND' C_CMD = 'C_COMMAND' L_CMD = 'L_COMMAND' def __init__(self, file_name): self.lines = [] self.current_position = 0 self.symbol_table = SymbolTable() self.readInFile(file_name) self.addLabelsToSymbolTable() self.substituteVars() def readInFile(self, file_name): with open (file_name, "r") as fp: for line in fp.readlines(): line = line.strip() # skip empty newlines and comments if not line or line.startswith("//"): continue self.lines.append(line.split()[0]) def addLabelsToSymbolTable(self): line_count = 1 while (self.hasMoreCommands()): self.advance() if self.commandType() == Parser.L_CMD: # TODO check this, this is wonky self.symbol_table.addEntry(self.symbol(), line_count-1) else: line_count += 1 # reset current_position after reading self.current_position = 0 def substituteVars(self): while (self.hasMoreCommands()): self.advance() if self.commandType() != Parser.A_CMD: continue var = self.symbol() if var.isdigit(): continue if not self.symbol_table.contains(var): self.symbol_table.addEntry(var) self.lines[self.current_position-1] = "@{}".format(self.symbol_table.getAddress(var)) # reset current_position after reading self.current_position = 0 def commandType(self): if not self.current_command: raise Exception("current_command is empty") if self.current_command.startswith("@"): return self.A_CMD if self.current_command.startswith("("): return self.L_CMD return self.C_CMD def hasMoreCommands(self): return self.current_position < len(self.lines) def advance(self): self.current_command = self.lines[self.current_position] self.current_position += 1 def symbol(self): return self.current_command.strip("@()") def dest(self): if len(self.current_command.split("=")) == 2: return self.current_command.split("=")[0] def comp(self): if len(self.current_command.split(";")) == 2: return self.current_command.split(";")[0] elif len(self.current_command.split("=")) == 2: return self.current_command.split("=")[1] def jump(self): if len(self.current_command.split(";")) == 2: return self.current_command.split(";")[1]
class Assembler: def __init__(self, filename): self.asm_filename = filename self.hack_filename = self.asm_filename[:-3] + 'hack' self.outfile = open(self.hack_filename, 'w') self.parser = Parser(self.asm_filename) self.code = Code() self.symbol_table = SymbolTable() self.next_available_address = 16 def translate(self): """Translates all the commands in the .asm file, writing the resulting binary code to the .hack file. """ self.first_pass() self.parser.reset() self.second_pass() self.outfile.close() def first_pass(self): """Goes through the input file, adding all the labels to the symbol table. """ label = self.parser.first_label() while not self.parser.is_done(): self.symbol_table.add_entry(label, self.parser.label_address()) label = self.parser.next_label() def second_pass(self): """Goes through the input file fo a second time, translating the commands into binary, and adding symbols to the symbol table as required. """ while not self.parser.is_done(): self.process_command() self.parser.next_command() def process_command(self): """Translates the current command of the parser, writing the corresponding binary command to the output file (including a newline character). """ if self.parser.command_type() == 'C_COMMAND': self.process_c_command() elif self.parser.command_type() == 'A_COMMAND': self.process_a_command() else: #Command is a label return def process_c_command(self): """Translates the current C-command of the parser, writing the corresponding binary command to the output file (including a newline character). """ dest_binary = self.code.dest(self.parser.dest) comp_binary = self.code.comp(self.parser.comp) jump_binary = self.code.jump(self.parser.jump) binary_command = '111' + comp_binary + dest_binary + jump_binary self.outfile.write(binary_command + '\n') def process_a_command(self): """Translates the current C-command of the parser, writing the corresponding binary command to the output file (including a newline character). """ symbol = self.parser.symbol if symbol.isnumeric(): address = int(symbol) elif self.symbol_table.contains(symbol): address = self.symbol_table.get_address(symbol) else: address = self.next_available_address self.symbol_table.add_entry(symbol, address) self.next_available_address += 1 binary_command = self.binary_number(address) self.outfile.write(binary_command + '\n') def binary_number(self, symbol): """Takes a decimal number in the form of a string and converts it to a 16-bit binary number in the form of a string. """ binary = bin(int(symbol)) return binary[2:].zfill(16)
class Parser(): """Parser: Encapsulates access to the input code. Reads an assembly language command, parses it, and provides convenient access to the command's components (fields and symbols). In addition, removes all white space and comments.""" def __init__(self, in_file): """Get the input file and gets ready to parse it. Instantiate a Code2bin for binary translation""" self.in_file = in_file self.code2bin = Code2Bin() self.symb_table = SymbolTable() def read_in_file(self): """Read the input file and process lines, put lines containing codes to a buffer""" self.code_contents = [] ROM_address = 0 with open(self.in_file, 'r', encoding='utf_8') as inf: for line in inf: command = self.process(line) # If returned command is an empty line after processed, skip it if not command: continue self.code_contents.append(command) cmd_type = self.command_type(command) if cmd_type == 'L_COMMAND': # cmd_type is 'L_COMMAND', add new entry to the symbol table symbol = self.get_symbol(command) self.symb_table.add_entry(symbol, ROM_address) else: ROM_address += 1 def process(self, line): """Removes all white space and comments""" return line.split('//')[0].strip() def command_type(self, command): """ Returns the type of the current command: A_COMMAND for @Xxx where Xxx is either a symbol or a decimal number C_COMMAND for dest=comp;jump (Either the dest or jump fields may be empty. If dest is empty, the "=" is omitted; If jump is empty, the ";" is omitted.) L_COMMAND (pseudo-command) for (Xxx) where Xxx is a symbol """ if '@' in command: return 'A_COMMAND' elif '=' in command or ';' in command: return 'C_COMMAND' elif '(' in command and ')' in command: return 'L_COMMAND' def get_symbol(self, command): """Returns the symbol or decimal Xxx of the current command @Xxx or (Xxx). Should be called only when command_type() is A_COMMAND or L_COMMAND.""" return command.replace('@', '').replace('(', '').replace(')', '') def get_dest_comp_jump(self, command): """Returns the dest, comp, jump mnemonic in the current C-command. Should be called only when command_type()is C_COMMAND""" if ';' not in command: dest, comp = command.split('=') jump = 'null' elif '=' not in command: dest = 'null' comp, jump = command.split(';') else: dest = command.split('=')[0] comp, jump = command.split('=')[1].split(';') # jump = command.split('=')[1].split(';')[1] return dest, comp, jump def translate(self): """Second pass, translate code_contents to binary contens""" self.out_binarys = [] available_RAM_address = 16 for command in self.code_contents: cmd_type = self.command_type(command) if cmd_type == 'A_COMMAND': symbol = self.get_symbol(command) if symbol.isdigit(): binary_line = '0{:015b}'.format(int(symbol)) self.out_binarys.append(binary_line) elif self.symb_table.contains(symbol): binary_line = '0{:015b}'.format( int(self.symb_table.get_address(symbol))) self.out_binarys.append(binary_line) else: self.symb_table.add_entry(symbol, available_RAM_address) binary_line = '0{:015b}'.format(available_RAM_address) self.out_binarys.append(binary_line) available_RAM_address += 1 elif cmd_type == 'C_COMMAND': dest, comp, jump = self.get_dest_comp_jump(command) dest_binary = self.code2bin.dest2bin(dest) comp_binary = self.code2bin.comp2bin(comp) jump_binary = self.code2bin.jump2bin(jump) binary_line = '111' + comp_binary + dest_binary + jump_binary self.out_binarys.append(binary_line) def write_out_binarys(self): out_file = self.in_file.replace('asm', 'hack') with open(out_file, 'w', encoding='utf_8') as outf: for binary in self.out_binarys: outf.write(binary + '\n') print(out_file, 'finished assembling.') def parse(self): self.read_in_file() self.translate() self.write_out_binarys()
class AssmParser(AssmCommandType, Parser): def __init__(self, file_name): """ Open the input file/stream and gets ready to parse it """ super(AssmParser, self).__init__(file_name) self.RAM = 16 self.symbol_table = SymbolTable(self.buff) self.symbol_table.find_symbols() def symbol(self): """ returns the symbol or decimal Xxx of the current command @Xxx of (Xxx). Should be called only when commandType() is A_COMMAND or L_COMMAND return string """ self.current_symbol = self.current_command[1:] def dest(self): """ returns the dest mnemonic in the current C_COMMAND (8 possibilities). Should be called only when AssmCommandType() is C_COMMAND returns string """ # semicolon: 000 dest if '=' in self.current_command: d = self.current_command.split('=')[0] else: d='null' code = Code() self.current_dest = code.dest(d) def comp(self): """ Returns the comp menomonic in the current C_COMMAND (28 possibilities) Should be called only when commandType() is C_COMMAND return string """ if '=' in self.current_command: c = self.current_command.split('=')[1] elif ';' in self.current_command: c = self.current_command.split(';')[0] code = Code() self.current_comp, self.current_a = code.comp(c) #def a(self): ##TODO: figure out how to know 'a' bit ## I believe it has to do whether the previous command is an l or a command #self.current_a = '0' def jump(self): """ Returns the jump mnemonic in the current C_COMMAND (8 possibilities) Should be called only when commandType() is C_COMMAND returns string """ # equal: 000 jump if ';' in self.current_command: j = self.current_command.split(';')[1] elif '=' in self.current_command: j='null' else: j = None code = Code() self.current_jump = code.jump(j) def __repr__(self): return self.asmfile + '\n'.join(self.buff) def binarize_c_command(self): self.comp() self.dest() self.jump() self.bin_current = '111' + self.current_a + self.current_comp + self.current_dest + self.current_jump def binarize_a_symbol(self): if not self.symbol_table.contains(self.current_command): self.symbol_table.addEntry(self.current_command, self.RAM) self.RAM = self.RAM + 1 address = self.symbol_table.get_address(self.current_command) bin_address = bin(address)[2:] self.bin_current = '0' * (16 - len(bin_address)) + bin_address def binarize_a_address(self): address = int(self.current_command[1:]) bin_address = bin(address)[2:] self.bin_current = '0'*(16 - len(bin_address)) + bin_address def binarize_a_command(self): if re.match(r'^@[0-9].*$', self.current_command): self.binarize_a_address() else: self.binarize_a_symbol()
class AssemblerSymb: def __init__(self, path): self.parser = Parser(path) self.code = Code() self.symb_table = SymbolTable() ind1 = path.find('/') ind2 = path.find('.') writefile = path[:ind1] + "/" + path[ind1+1:ind2] self.file = open(writefile + '2.hack', 'w') def binary(self, s): return "{0:b}".format(int(s)) def firstPass(self): counter = 0 while self.parser.hasMoreCommands(): self.parser.advance() command_type = self.parser.commandType() if command_type in ['A_COMMAND', 'C_COMMAND']: counter += 1 elif command_type == 'L_COMMAND': symbol = self.parser.symbol() self.symb_table.addEntry(symbol, counter) else: raise ValueError("Unexpected command type encountered") def secondPass(self): ram_address = 16 self.parser.i = -1 while self.parser.hasMoreCommands(): self.parser.advance() command_type = self.parser.commandType() if command_type == 'A_COMMAND': symbol = self.parser.symbol() if (not symbol.isdigit()) and (not self.symb_table.contains(symbol)): self.symb_table.addEntry(symbol, ram_address) ram_address += 1 def createOutput(self): self.parser.i = -1 while self.parser.hasMoreCommands(): self.parser.advance() command_type = self.parser.commandType() # if A command if command_type == 'A_COMMAND': symbol = self.parser.symbol() if symbol.isdigit(): bin_symbol = self.binary(symbol) else: symb_add = self.symb_table.getAddress(symbol) bin_symbol = self.binary(symb_add) a_command = '0' * (16 - len(bin_symbol)) + bin_symbol self.file.write(a_command + '\n') elif command_type == 'C_COMMAND': dest_mnem = self.parser.dest() dest = self.code.dest(dest_mnem) comp_mnem = self.parser.comp() comp = self.code.comp(comp_mnem) jump_mnem = self.parser.jump() jump = self.code.jump(jump_mnem) c_command = '111' + comp + dest + jump self.file.write(c_command + '\n') else: pass self.file.close()
class EmbindGenerator(object): def __init__(self, symbol_dir, classes): self.symbol_table = SymbolTable() self.symbol_table.LoadSymbolsFromDirectory(symbol_dir) self.classes = classes self.known_symbols = [ '', # Typeless 'types', e.g. return value of ctor is parsed to an empty string. 'void', 'bool', 'char', 'signed char', 'unsigned char', 'short', 'signed short', 'unsigned short', 'int', 'signed int', 'unsigned int', 'long', 'signed long', 'unsigned long', 'float', 'double', 'unsigned int', 'std::string', 'emscripten::val', ] self.generated_function_selectors = [] # Lists the types for which we need to generate type identifier functions for. self.require_type_function = [] def output(self, output): self.cpp_out = open('{}.cpp'.format(output), 'w') self.js_out = open('{}.js'.format(output), 'w') self.cpp_out.write('''#ifdef EMSCRIPTEN #include <emscripten/bind.h> using namespace emscripten #include "embind_prologue.h" # Implement this file and all required code and header files to compile this file here." ''') self.js_out.write('function RegisterFunctionSelectors() {\n') for c in self.classes: self.GenerateCtorFunctions(c) # TODO:: if we create a separate bindings file for each, generate a unique name here. self.cpp_out.write('EMSCRIPTEN_BINDINGS(bindings) {\n\n') for c in self.classes: self.WriteForwardDeclaration(c) for c in self.classes: self.GenerateEmbindFile(c, self.known_symbols) self.cpp_out.write("\n}\n\n") self.cpp_out.write("#endif\n") self.cpp_out.close() print("Writing embind_symbols.cpp done.") self.GenerateTypeIdAssignments() self.js_out.write("}\n") self.js_out.write( "window['RegisterFunctionSelectors'] = RegisterFunctionSelectors;\n" ) self.GenerateIsOfTypeFunctions() self.js_out.close() print("Writing embind_symbols.js done.") def WriteForwardDeclaration(self, class_name): pass # string t = "auto " + class_name + "_class = class_<" + class_name + ">(\"" + class_name + "\");\n\n" # self.cpp_out.Write(t) def FetchFunctionOverloads(self, function): functionOverloads = [ s for s in function.parent.children if s.name == function.name and self.IsGoodSymbol(s) ] functionOverloads.sort( lambda left, right: len(right.parameters) - len(left.parameters)) return functionOverloads def GenerateTypeIdAssignments(self): typeIdCounter = 1 for n, _type in enumerate(self.require_type_function): if _type in ["float", "int"]: continue self.js_out.write( ' Module.{0}.prototype.TypeId = {1}; /* Magic automatically generated TypeId number for {0} */\n' .format(_type, n + 1)) def GenerateIsOfTypeFunctions(self): self.js_out.write('''function isNumber(value) { if ((undefined === value) || (null === value)) { return false; } if (typeof value == 'number') { return true; } return !isNaN(value - 0); } ''') for n, _type in enumerate(self.require_type_function): if _type in ['float', 'int']: self.js_out.write( 'function IsOfType_{}(obj) {{ return isNumber(obj); }}\n'. format(_type)) else: self.js_out.write( '''/* Magic automatically generated TypeId number for {0} */ function IsOfType_{0}(obj) {{ return obj != undefined && obj != null && obj.TypeId == {1}; }}\n''' .format(_type, n)) def GenerateFunctionSelector(self, functionOverloads): functionOverloads.sort( lambda left, right: len(right.parameters) - len(left.parameters)) function = functionOverloads[0] if (function.parent.name + "::" + function.name) in self.generated_function_selectors: return self.generated_function_selectors.append(function.parent.name + "::" + function.name) if len(function.parameters) == 0: return # TODO: Add support for REAL ctors. isCtor = (function.name == function.parent.name) if isCtor: prototype = ['{}_ = function('.format(function.name)] else: prototype = [ '{}.prototype.{} = function('.format(function.parent.name, function.name) ] prototype.append(', '.join( ['arg{}'.format(i + 1) for i in len(function.parameters)])) prototype.append(') {\n') self.js_out.write(''.join(prototype)) thisFunc = functionOverloads[0] for i in xrange(len(functionOverloads)): nextFunc = functionOverloads[i + 1] if i < (len(functionOverloads) - 1) else None self.js_out.write(" ") if i: self.js_out.write("else ") if nextFunc: if len(thisFunc.parameters) != len(nextFunc.parameters): self.js_out.write("if (arg{} != undefined)".format( len(thisFunc.parameters))) else: self.js_out.write("if (") for j in xrange(len(thisFunc.parameters)): _type = thisFunc.parameters[j].BasicType() if j: self.js_out.write(" && ") self.js_out.write("IsOfType_{}(arg{})".format( _type, j + 1)) if _type not in self.require_type_function: self.require_type_function.append(_type) self.js_out.write(")") self.js_out.write("\n") self.js_out.Write( ' {}'.format('return ' if thisFunc.type != 'void' else '')) self.js_out.Write('{}.{}'.format('Module' if isCtor else 'this', function.name)) for p in thisFunc.parameters: self.js_out.write("_" + p.BasicType().replace( ':', '_').replace('<', '_').replace('>', '_')) paramList = [ 'arg{}'.format(j + 1) for j in xrange(len(thisFunc.parameters)) ] self.js_out.write('({});\n'.format(', '.join(paramList))) thisFunc = nextFunc self.js_out.write('}\n') def IsGoodSymbol(s): if ("noembind" in s.attributes or s.type not in self.known_symbols): return False for p in s.parameters: if p.BasicType() not in self.known_symbols: return False return True def GenerateEmbindFile(class_name): if not self.symbol_table.contains(class_name): print( "Error: Cannot generate bindings for class '{}', XML for that class doesn't exist!" .format(class_name)) return code = [ '##include <emscripten/bind.h>', '#using namespace emscripten;', '', '##include "embind_prologue.h" # Implement this file and all required code and header files to compile this file here.', '', '#EMSCRIPTEN_BINDINGS({0}) {'.format(class_name), 'class_<{0}>("{0}")'.format(class_name) ] hasCtorExposed = False # Embind only supports exposing one ctor, so pick the first one. # t += class_name + "_class\n" s = self.symbol_table.symbol(class_name) for f in s.children: if f.visibilityLevel != VisibilityLevel.Public: continue # Only public functions and members are exported. functionOverloads = self.FetchFunctionOverloads(f) good_symbol = "noembind" not in f.attributes # If True, this symbol is exposed. If False, this symbol is not enabled for JS. reason = [ '' if good_symbol else '(ignoring since [noembind] specified)' ] if f.kind == "function" and not f.name.StartsWith("operator"): isCtor = (f.name == class_name) if good_symbol and f.type not in self.known_symbols: good_symbol = False reason.append('({} is not known to embind)'.format(f.type)) hasOverloads = len(functionOverloads) > 1 targetFunctionName = [ f.name ] # The JS side name with which this function will be exposed. funcPtrType = [ '{}({}*)('.format( f.type, '' if f.isStatic else '{}::'.format(class_name)) ] paramList = [] for p in f.parameters: paramList.append(p.type) if good_symbol and p.BasicType() not in self.known_symbols: good_symbol = False reason.append('{} is not known to embind)'.format( p.BasicType())) if hasOverloads: targetFunctionName.append('_{}'.format( p.BasicType().replace(':', '_').replace( '<', '_').replace('>', '_'))) funcPtrType.append('{})'.format(','.join(paramList))) if f.isConst: funcPtrType.append(' const') # TODO: Remove this line once multiple ctors is supported! if (good_symbol and f.name == class_name and hasCtorExposed): good_symbol = False reason = "(Multiple constructors not yet supported by embind!)" if not good_symbol: code.append('# /*{}*/'.format(reason)) if isCtor: code.append(' .constructor<{}>()\n'.format( ','.join(parmList))) if good_symbol: hasCtorExposed = True else: if f.isStatic: code.append(' .class_function(') else: code.append(' .function(') code.append('"{}", ({})&{}::{})'.format( targetFunctionName, funcPtrType, class_name, f.name)) if hasOverloads and good_symbol: self.GenerateFunctionSelector(functionOverloads) elif f.kind == "variable" and f.visibilityLevel == VisibilityLevel.Public: if f.type not in self.known_symbols: code.append('# /* {} is not known to embind. */'.format( f.type)) elif f.IsArray(): code.append( '# /* Exposing array types as fields are not supported by embind. */' ) elif f.isStatic: code.append( '# /* Exposing static class variables not yet implemented (are they supported?) */' ) code.append(' .property("{0}", &{1}::{0})'.format( f.name, class_name)) code.append(" ;") code.append("#}") self.RegisterCtorFunctions(class_name) self.cpp_out.Write('\n'.join(code)) def GenerateCtorFunctions(self, class_name): s = self.symbol_table.symbol(class_name) ctors = [] for f in s.children: if f.name == s.name and len( f.parameters ): # 0-parameter ctors are created with 'new type();' good_ctor = True for p in f.parameters: if p.BasicType() not in self.known_symbols: good_ctor = False break if good_ctor: ctors.append(f) self.cpp_out.write('{} {}'.format( class_name, '_'.join(class_name + [p.BasicType() for p in f.parameters]))) self.cpp_out.write('{} {{ return {}{}; }}'.format( f.ArgStringWithTypes(), class_name, f.ArgStringWithoutTypes())) self.cpp_out.write('\n') # self.js_out.WriteLine(class_name + " = Module." + class_name +";") self.js_out.write('window["{0}"] = Module.{0};\n'.format(class_name)) if len(ctors) > 1: self.GenerateFunctionSelector(ctors) self.js_out.write( 'window["{0}_"] = Module.{0}_;\n'.format(class_name)) def RegisterCtorFunctions(self, class_name): s = self.symbol_table.symbol(class_name) for f in s.children: if f.name == s.name and len( f.parameters ): # 0-parameter ctors are created with 'new type();' good_ctor = True for p in f.parameters: if p.BasicType() not in self.known_symbols: good_ctor = False break if good_ctor: t = '_'.join(class_name + [p.BasicType() for p in f.parameters]) self.cpp_out.write('function("{0}", &{0});\n'.format(t))
def main(): filename = os.path.join(os.getcwd(), Util.getCommandLineArg(1)) first_parser = Parser(filename) second_parser = Parser(filename) symbol_table = SymbolTable() hack_filename = filename.replace('asm', 'hack') hack_file = open(hack_filename, 'w') ann_filename = filename.replace('asm', 'ann') ann_file = open(ann_filename, 'w') rom_address = 0 ram_address = 16 assembly = '' while first_parser.has_more_commands(): first_parser.advance() if first_parser.command_type( ) is 'A_COMMAND' or first_parser.command_type() is 'C_COMMAND': rom_address += 1 elif first_parser.command_type() is 'L_COMMAND': symbol_table.add_entry(first_parser.symbol(), rom_address, 'LAB') while second_parser.has_more_commands(): second_parser.advance() machine_command = '' if second_parser.command_type() is 'A_COMMAND': if second_parser.symbol()[0].isdigit(): binary = second_parser.symbol() else: if symbol_table.contains(second_parser.symbol()): binary = symbol_table.get_address( second_parser.symbol()) else: binary = ram_address symbol_table.add_entry(second_parser.symbol(), ram_address, 'VAR') ram_address += 1 machine_command = '{0:016b}\n'.format(int(binary)) hack_file.write(machine_command) elif second_parser.command_type() is 'C_COMMAND': dest = Code.dest(second_parser.dest()) comp = Code.comp(second_parser.comp()) jump = Code.jump(second_parser.jump()) machine_command = '111{0}{1}{2}\n'.format(comp, dest, jump) hack_file.write(machine_command) assembly = second_parser.original_command().strip() mc = machine_command.strip() annotated_machine = '{} {} {} {}'.format(mc[0:4], mc[4:8], mc[8:12], mc[12:16]) symbolless_command = '' if second_parser.command_type() is 'L_COMMAND': symbolless_command = symbol_table.get_address( second_parser.symbol()) elif second_parser.command_type( ) is 'A_COMMAND' and not second_parser.symbol().isdigit(): symbolless_command = '@{}'.format( symbol_table.get_address(second_parser.symbol())) else: symbolless_command = second_parser.command annotated_command = '{:<39} {} {:<11} {}\n'.format( assembly, '//' if second_parser.command_type() else '', symbolless_command, annotated_machine) ann_file.write(annotated_command) ann_file.write('\n// Symbol Table:\n') for symbol, address in symbol_table.symbol_table.items(): ann_file.write('// {}: {:<30} -> {}\n'.format( address[1], symbol, address[0])) hack_file.close() ann_file.close()
def main(): # If there is an invalid number of arguments the program stops. if len(sys.argv) != 2: print("ERROR: Invalid number of arguments. Expected: file_name.asm ") exit(1) # The assembler only accepts asm files to be translated into hack files elif sys.argv[1][-4:] != ".asm": print("ERROR: Invalid file type. Expected: asm file") exit(1) input_file = sys.argv[1] # Initialize the symbol table with the predefined symbols. symbol_table = SymbolTable() translator_c_command = Code() # Counters to keep track of the ROM and RAM memory address. count_ROM = 0 count_variable = 16 # List containing all the translated commands from the file. commands_translation = [] # First pass parser = Parser(input_file) # Reads the whole file. while parser.has_more_commands(): parser.advance() # Checks if the current command is has a label to and adds it to the table. if parser.command_type() == "L_COMMAND": # Takes the symbol from the label. label = parser.symbol() # Check if the label does not start with a number and adds the symbol to the table. if not label[0].isdigit(): symbol_table.add_entry(label, count_ROM) else: print("ERROR: invalid label indentifier") exit(1) else: # If it finds an A_COMMAND or C_COMMAND adds one to the ROM counter. count_ROM += 1 # Reset the parser pointer to read the file parser.file.seek(0) # Second pass #Reads the whole file while parser.has_more_commands(): parser.advance() # Checks if the current command is type "A_COMMAND". if parser.command_type() == "A_COMMAND": # Get the variable variable = parser.symbol() # Checks the variable starts with a letter. if not variable[0].isdigit(): # If the table does not contain the symbol, adds it to the table, does the translation to binary code, # adds it to the list of translations and ads one to the RAM counter. if not symbol_table.contains(variable): symbol_table.add_entry(variable, count_variable) binary_address = "{:016b}".format(count_variable) commands_translation.append(binary_address) count_variable += 1 # If the table contains the symbol, gets the address associated with the symbol, # does the translation to binary code and adds it to the translated list. else: address = symbol_table.get_address(variable) binary_address = "{:016b}".format(address) commands_translation.append(binary_address) # Check if the variable is a number, translates it to its binary code and adds it to the translated list. elif variable.isdigit(): binary_address = "{:016b}".format(int(variable)) commands_translation.append(binary_address) # If the variable is not a number o starts with a letter theres a mistake in the command and the program stops. else: print("ERROR: The symbol " + variable + " is invalid") exit(1) # Check if the current command is type "C_COMMAND". elif parser.command_type() == "C_COMMAND": # Gets the dest, comp and jump mnemonic. command_dest = parser.dest() command_comp = parser.comp() command_jump = parser.jump() # Translates each mnemonic into its binary code. binary_dest = translator_c_command.dest(command_dest) binary_comp = translator_c_command.comp(command_comp) binary_jump = translator_c_command.jump(command_jump) # Put together all the binary codes addring three '1's at the beging and adds it to the translated list. binary_code = "111" + binary_comp + binary_dest + binary_jump commands_translation.append(binary_code) # Creates the hack file using the input file dot_index = input_file.find(".") hack_file = input_file[:dot_index] + ".hack" # Opens the hack file, if it does not exist creates it file = open(hack_file, "w") # For each command in the translated list, writes the binary code on the hack file and adds a new line for command in commands_translation: file.write(command) file.write("\n") # Close the hack file file.close() exit(0)
class HackAssembler: def __init__(self, asm_filename): self.asm_filename = asm_filename self.symbol_table = SymbolTable() self.parser = Parser(self.symbol_table) self.binary_translator = BinaryTraslator(self.parser) self.next_open_memory_address = 16 self.labels_parsed = 0 self.output_string = '' def compile(self): # self.__remove_commented_and_empty_lines() self.__scan_for_labels() self.__scan_for_variables() self.__translate_to_binary() self.__write_to_out_file() # def __remove_commented_and_empty_lines(self): # self.__read_file_by_line(self.__remove_commented_or_empty_line) # def __remove_commented_or_empty_line(self, line, cnt): # line = line.split('//')[0].strip().replace(' ', '') # if len(line) > 0: # self.output_string += "{}\n".format(line) def __scan_for_labels(self): self.__read_file_by_line(self.__add_value_to_symbol_table_if_label) def __scan_for_variables(self): self.__read_file_by_line(self.__add_value_to_symbol_table_if_variable) def __translate_to_binary(self): self.__read_file_by_line(self.__translate_line_to_binary) def __write_to_out_file(self): base_filename = self.asm_filename.split('.asm')[0] hack_filename = "{}_Brayden.hack".format(base_filename) with open(hack_filename, "w") as text_file: text_file.write(self.output_string) def __read_file_by_line(self, block): lines_processed = 0 with open(self.asm_filename) as fp: for line in fp: line = self.__preprocess_line(line) if len(line) > 0: block(line, lines_processed) lines_processed += 1 def __preprocess_line(self, line): return line.split('//')[0].strip().replace(' ', '') def __add_value_to_symbol_table_if_label(self, line, cnt): if self.parser.is_label(line): self.symbol_table.add(self.parser.label_value(line), cnt - self.labels_parsed) self.labels_parsed += 1 def __add_value_to_symbol_table_if_variable(self, line, cnt): if self.parser.is_variable(line): if not self.symbol_table.contains( self.parser.variable_value(line)): self.symbol_table.add(self.parser.variable_value(line), self.next_open_memory_address) self.__increment_next_open_memory_address() def __translate_line_to_binary(self, line, _): binary_command = self.binary_translator.translate(line) if binary_command: self.output_string += '{}\n'.format(binary_command) def __increment_next_open_memory_address(self): self.next_open_memory_address += 1
else: line_address += 1 parser.seek_head() var_address = 16 while parser.has_more_commands(): parser.advance() if parser.command_type() == 'L_COMMAND': continue elif parser.command_type() == 'A_COMMAND': symbol = parser.symbol() if symbol.isdigit(): address = int(symbol) elif symbol_table.contains(symbol): address = symbol_table.get_address(symbol) else: address = var_address symbol_table.add_entry(symbol, address) var_address += 1 machine_code = address elif parser.command_type() == 'C_COMMAND': comp = Code.comp(parser.comp()) dest = Code.dest(parser.dest()) jump = Code.jump(parser.jump()) machine_code = 0b111 << 13 | comp << 6 | dest << 3 | jump machine_code_str = "{0:016b}".format(machine_code) hack_file.write(machine_code_str + '\n')
class Parser: def __init__(self, assembly_path: str) -> None: self.coder = Code() self.symbol_table = SymbolTable() self.assembly = [] self.idx = 0 f = open(assembly_path, 'r') while True: line = f.readline() if not line: break if line[:2] == "//": continue if [e for e in line if e != " "] == ["\n"]: continue self.assembly.append(line) f.close() def reset_idx(self): self.idx = 0 def hasMoreCommands(self) -> bool: return self.idx < len(self.assembly) def advance(self) -> None: if self.hasMoreCommands(): self.idx += 1 def commandType(self) -> str: if self.hasMoreCommands(): curr = self.assembly[self.idx] if "@" in curr: print("A", curr) return A elif "=" in curr or ";" in curr: print("C", curr) return C elif "(" in curr: return L else: return None def symbol(self) -> str: symbol = "".join([s for s in self.assembly[self.idx] if s != " "]) symbol = symbol.split("\n")[0] if self.commandType() == A: symbol = symbol[1:] if not self.symbol_table.contains(symbol): try: address = int(symbol) self.symbol_table.addEntry(symbol, str(address)) except: self.symbol_table.addEntry(symbol, str(self.symbol_table.idx)) self.symbol_table.advance() self.assembly[ self.idx] = "@" + self.symbol_table.getAddress(symbol) elif self.commandType() == L: symbol = symbol[1:-1] if not self.symbol_table.contains(symbol): self.symbol_table.addEntry(symbol, str(self.idx)) self.assembly = self.assembly[:self.idx] + self.assembly[self.idx + 1:] else: return "" return self.symbol_table.getAddress(symbol) def dest(self) -> str: if self.commandType() == C: return self.coder.dest(self.assembly[self.idx]) else: raise NotImplementedError def comp(self) -> str: if self.commandType() == C: return self.coder.comp(self.assembly[self.idx]) else: raise NotImplementedError def jump(self) -> str: if self.commandType() == C: return self.coder.jump(self.assembly[self.idx]) else: raise NotImplementedError # def address(self) -> str: # if self.commandType() == A: # return self.coder.address(self.assembly[self.idx]) # else: # raise NotImplementedError def address(self) -> str: instruction = self.assembly[self.idx] instruction = "".join([i for i in instruction if i != " "]) instruction = instruction[1:].split("\n")[0] if self.symbol_table.contains(instruction): address = self.symbol_table.getAddress(instruction) else: address = instruction b = bin(int(address))[2:] address = "0" * (16 - len(b)) + b return address
def main(): filename = os.path.join(os.getcwd(), Util.getCommandLineArg(1)) first_parser = Parser(filename) second_parser = Parser(filename) symbol_table = SymbolTable() hack_filename = filename.replace('asm', 'hack') hack_file = open(hack_filename, 'w') ann_filename = filename.replace('asm', 'ann') ann_file = open(ann_filename, 'w') rom_address = 0 ram_address = 16 assembly = '' while first_parser.has_more_commands(): first_parser.advance() if first_parser.command_type() is 'A_COMMAND' or first_parser.command_type() is 'C_COMMAND': rom_address += 1 elif first_parser.command_type() is 'L_COMMAND': symbol_table.add_entry(first_parser.symbol(), rom_address, 'LAB') while second_parser.has_more_commands(): second_parser.advance() machine_command = '' if second_parser.command_type() is 'A_COMMAND': if second_parser.symbol()[0].isdigit(): binary = second_parser.symbol() else: if symbol_table.contains(second_parser.symbol()): binary = symbol_table.get_address(second_parser.symbol()) else: binary = ram_address symbol_table.add_entry(second_parser.symbol(), ram_address, 'VAR') ram_address += 1 machine_command = '{0:016b}\n'.format(int(binary)) hack_file.write(machine_command) elif second_parser.command_type() is 'C_COMMAND': dest = Code.dest(second_parser.dest()) comp = Code.comp(second_parser.comp()) jump = Code.jump(second_parser.jump()) machine_command = '111{0}{1}{2}\n'.format(comp, dest, jump) hack_file.write(machine_command) assembly = second_parser.original_command().strip() mc = machine_command.strip() annotated_machine = '{} {} {} {}'.format(mc[0:4], mc[4:8], mc[8:12], mc[12:16]) symbolless_command = '' if second_parser.command_type() is 'L_COMMAND': symbolless_command = symbol_table.get_address(second_parser.symbol()) elif second_parser.command_type() is 'A_COMMAND' and not second_parser.symbol().isdigit(): symbolless_command = '@{}'.format(symbol_table.get_address(second_parser.symbol())) else: symbolless_command = second_parser.command annotated_command = '{:<39} {} {:<11} {}\n'.format(assembly, '//' if second_parser.command_type() else '', symbolless_command, annotated_machine) ann_file.write(annotated_command) ann_file.write('\n// Symbol Table:\n') for symbol, address in symbol_table.symbol_table.items(): ann_file.write('// {}: {:<30} -> {}\n'.format(address[1], symbol, address[0])) hack_file.close() ann_file.close()
class Assembler(object): """ lalala """ def __init__(self, asm_file): self.asm_file = asm_file self.hack_file = ''.join([asm_file.split('.')[0], '.hack']) self.parser = Parser(asm_file) self.code = Code() self.symbol_table = SymbolTable() def assembly(self): print('Starting to assembly {} file...'.format(self.asm_file)) with open(self.hack_file, 'w') as hack_f: ################## ### First pass ### ################## line_number = 0 while self.parser.advance(): command_type = self.parser.command_type() if command_type == self.parser.c_command or command_type == self.parser.a_command: line_number += 1 elif command_type == self.parser.l_command: symbol = self.parser.symbol() self.symbol_table.add_entry(symbol, line_number) else: raise ValueError('Ups!') ################### ### Second pass ### ################### next_var_address = 16 while self.parser.advance(): command_type = self.parser.command_type() if command_type == self.parser.c_command: dest_bin = self.code.dest_mnemonic[self.parser.dest()] comp_bin = self.code.comp_mnemonic[self.parser.comp()] jump_bin = self.code.jump_mnemonic[self.parser.jump()] word_16 = ''.join( ['111', comp_bin, dest_bin, jump_bin, '\n']) elif command_type == self.parser.a_command: symbol = self.parser.symbol() if symbol.isdigit(): address_bin = format(int(symbol), 'b').zfill(15) else: if self.symbol_table.contains(symbol) == False: self.symbol_table.add_entry( symbol, next_var_address) next_var_address += 1 address_int = self.symbol_table.get_address(symbol) address_bin = format(address_int, 'b').zfill(15) word_16 = ''.join(['0', address_bin, '\n']) elif command_type == self.parser.l_command: continue else: raise ValueError('Ups!') hack_f.write(word_16) print('Successfully finished the assembly process :)')
class CompilationEngine(object): # the destination file for writing destination_file = None # the tokenizer for the input file tokenizer = None # symbol table symbol_table = None # vm writer vm_writer = None # the class name class_name = "" # indicies for if and while loops # start at -1 because we increment before use while_index = -1 if_index = -1 # the constructor for compiling a single class # the next method to be called after construction must be compile_class # source_filename must be a single file, not a directory def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension .vm # if the original extension was not .jack, then append .vm if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".vm" else: destination_filename = source_filename + ".vm" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # create the symbol table self.symbol_table = SymbolTable() # create the vm writer self.vm_writer = VMWriter(self.destination_file) # compiles a complete class and closes the output file def compile_class(self): # class keyword tt, t = self._token_next(True, "KEYWORD", "class") # name of class tt, t = self._token_next(True, "IDENTIFIER") self.class_name = t # open brace tt, t = self._token_next(True, "SYMBOL", "{") # one or more variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["field", "static"]: self.compile_class_var_dec() else: # stop trying to process variable declarations break # one or more subroutine declarations while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["constructor", "function", "method"]: self.compile_subroutine() else: # stop trying to process functions break # close brace # do not advance because we already advanced upon exiting the last loop tt, t = self._token_next(False, "SYMBOL", "}") # done with compilation; close the output file self.destination_file.close() # compiles a static declaration or field declaration def compile_class_var_dec(self): # compile the variable declaration # False means this is a class (not a subroutine) self.compile_var_dec(False) # compiles a complete method, function, or constructor def compile_subroutine(self): # start of subroutine self.symbol_table.start_subroutine() # constructor, function, or method keyword tt, type = self._token_next(False, "KEYWORD") # type of the return value # can be either keyword (void) or an identifier (any type) tt, t = self._token_next(True) # name of the method/function/constructor tt, name = self._token_next(True) name = self.class_name + "." + name # if the type is a method, "define" this as an argument, so the other # argument indexes work correctly if type == "method": self.symbol_table.define("this", self.class_name, SymbolTable.ARG) # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # arguments self.tokenizer.advance() self.compile_parameter_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t == "var": self.compile_var_dec() else: # stop trying to process variable declarations break # write the function num_locals = self.symbol_table.var_count(self.symbol_table.VAR) self.vm_writer.write_function(name, num_locals) # write any special code at the top of the function if type == "constructor": # code to allocate memory and set "this" size = self.symbol_table.var_count(self.symbol_table.FIELD) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) elif type == "function": # nothing special pass elif type == "method": # put argument 0 into pointer 0 (this) self.vm_writer.write_push(self.vm_writer.ARG, 0) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) else: print "WARNING: Expected constructor, function, or name; got", type # statements self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") self.tokenizer.advance() # compiles a (possibly empty) parameter list, not including the enclosing # parentheses def compile_parameter_list(self): # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # keyword (variable type) tt, type = self._token_next(False) # identifier (variable name) tt, name = self._token_next(True) # the kind is always an arg, since these are all parameters to the # function kind = SymbolTable.ARG # define the variable in the symbol table self.symbol_table.define(name, type, kind) # possible comma tt, t = self._token_next(True) if tt != "SYMBOL" or t != ",": # not a comma; stop processing parameters break self.tokenizer.advance() # compiles a var declaration # if subroutine is true, only the var keyword can be used # if subroutine is false, only the static and field keywords can be used def compile_var_dec(self, subroutine=True): # the keyword to start the declaration tt, kind = self._token_next(False, "KEYWORD") # check for required types if subroutine: if kind == "var": kind = SymbolTable.VAR else: print "WARNING: expecting var, but received %s" % (str(kind)) else: if kind == "static": kind = SymbolTable.STATIC elif kind == "field": kind = SymbolTable.FIELD else: print "WARNING: expecting static or field, but received %s" % (str(kind)) # type of the declaration # could be an identifier or a keyword (int, etc) tt, type = self._token_next(True) # name of the declaration tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) # can support more than one identifier name, to declare more than one # variable, separated by commas; process the 2nd-infinite variables self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": # another variable name follows tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) self.tokenizer.advance() else: # no more variable names break # should be on the semicolon at the end of the line tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a sequence of statements, not including the enclosing {} def compile_statements(self): while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]: # call compile_t, where t is the type of compilation we want token = getattr(self, "compile_" + t)() else: # not a statement; stop processing statements break # compiles a do statement def compile_do(self): # do keyword tt, t = self._token_next(False, "KEYWORD", "do") # subroutine call self.tokenizer.advance() self.compile_subroutine_call() # do statements do not have a return value, so eliminate the return # off of the stack self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a let statement def compile_let(self): # let keyword tt, t = self._token_next(False, "KEYWORD", "let") # variable name tt, name = self._token_next(True, "IDENTIFIER") # possible brackets for array tt, t = self._token_next(True) if tt == "SYMBOL" and t == "[": # array - write operation array = True # compile the offset expression self.tokenizer.advance() self.compile_expression() # write the base address onto the stack segment, index = self._resolve_symbol(name) self.vm_writer.write_push(segment, index) # add base and offset self.vm_writer.write_arithmetic("add") # we cannot yet put the result into pointer 1, since the read # operation (which hasn't been parsed/computed yet) may use pointer 1 # to read from an arrya value # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance to the next token, since we are expected to be on the = for # the next line self.tokenizer.advance() else: array = False # equals sign tt, t = self._token_next(False, "SYMBOL", "=") # expression self.tokenizer.advance() self.compile_expression() if array: # our stack now looks like this: # TOP OF STACK # computed result to store # address in which value should be stored # ... previous stuff ... # pop the computed value to temp 0 self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # pop the array address to pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # put the computed value back onto the stack self.vm_writer.write_push(self.vm_writer.TEMP, 0) # pop to the variable name or the array reference self.vm_writer.write_pop(self.vm_writer.THAT, 0) else: # not an array - pop the expression to the variable segment, index = self._resolve_symbol(name) self.vm_writer.write_pop(segment, index) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a while statement def compile_while(self): # labels for this while loop self.while_index += 1 while_start = "WHILE_START_%d" % (self.while_index) while_end = "WHILE_END_%d" % (self.while_index) # while keyword tt, t = self._token_next(False, "KEYWORD", "while") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # label for the start of the while statement self.vm_writer.write_label(while_start) # the expression that is the condition of the while statement self.tokenizer.advance() self.compile_expression() # the closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto to the end of the loop # to do this, negate and then call if-goto self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(while_end) # the opening brace tt, t = self._token_next(True, "SYMBOL", "{") # the statments that is the body of the while loop self.tokenizer.advance() self.compile_statements() # the closing brace tt, t = self._token_next(False, "SYMBOL", "}") # after the last statement of the while loop # need to jump back up to the top of the loop to evaluate again self.vm_writer.write_goto(while_start) # label at the end of the loop self.vm_writer.write_label(while_end) self.tokenizer.advance() # compiles a return statement def compile_return(self): # return keyword tt, t = self._token_next(False, "KEYWORD", "return") # possible expression to return tt, t = self._token_next(True) if tt != "SYMBOL" and t != ";": self.compile_expression() else: # no return expression; return 0 self.vm_writer.write_push(self.vm_writer.CONST, 0) # ending semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.vm_writer.write_return() self.tokenizer.advance() # compiles a if statement, including a possible trailing else clause def compile_if(self): # it is more efficient in an if-else case to have the else portion first # in the code when testing, but we use the less-efficient but # easier-to-write true-false pattern here # labels for this if statement self.if_index += 1 if_false = "IF_FALSE_%d" % (self.if_index) if_end = "IF_END_%d" % (self.if_index) # if keyword tt, t = self._token_next(False, "KEYWORD", "if") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # expression of if statement self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto the false label # if true, fall through to executing code # if there is no else, then false and end are the same, but having two # labels does not increase code size self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements for true portion self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") tt, t = self._token_next(True) if tt == "KEYWORD" and t == "else": # else statement exists # goto the end of the if statement at the end of the true portion self.vm_writer.write_goto(if_end) # label for the start of the false portion self.vm_writer.write_label(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") # end label self.vm_writer.write_label(if_end) # advance tokenizer only if we are in the else, since otherwise the # token was advanced by the else check self.tokenizer.advance() else: # no else portion; only put in a label for false, since end is not # used self.vm_writer.write_label(if_false) # compiles an expression (one or more terms connected by operators) def compile_expression(self): # the first term self.compile_term() # finish any number of operators followed by terms while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t in "+-*/&|<>=": # found an operator # postfix order - add the next term and then do the operator # the next term self.tokenizer.advance() self.compile_term() # the operator if t == "+": self.vm_writer.write_arithmetic("add") if t == "-": self.vm_writer.write_arithmetic("sub") if t == "=": self.vm_writer.write_arithmetic("eq") if t == ">": self.vm_writer.write_arithmetic("gt") if t == "<": self.vm_writer.write_arithmetic("lt") if t == "&": self.vm_writer.write_arithmetic("and") if t == "|": self.vm_writer.write_arithmetic("or") if t == "*": self.vm_writer.write_call("Math.multiply", 2) if t == "/": self.vm_writer.write_call("Math.divide", 2) else: # no term found; done parsing the expression break # compiles a term # this routine is faced with a slight difficulty when trying to decide # between some of the alternative parsing rules. specifically, if the # current token is an identifier, the routine must distinguish between a # variable, an array entry, and a subroutine call. a single lookahead token, # which may be one of [, (, or ., suffices to distinguish between the three # possibilities. any other token is not part of this term and should not # be advanced over. def compile_term(self): # a term: integer_constant | string_constant | keyword_constant | # varname | varname[expression] | subroutine_call | (expression) | # unary_op term tt, t = self._token_next(False) if tt == "INT_CONST": self.vm_writer.write_push(self.vm_writer.CONST, t) # advance for the next statement self.tokenizer.advance() elif tt == "STRING_CONST": # after this portion is run, a pointer to a string should be on the # stack # we create a new string of a certain size and then append characters # one by one; each append operation returns the pointer to the same # string # create the string # string is a len, data tuple; not null-terminated size = len(t) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("String.new", 1) # append each character for char in t: self.vm_writer.write_push(self.vm_writer.CONST, ord(char)) self.vm_writer.write_call("String.appendChar", 2) # advance for the next statement self.tokenizer.advance() elif tt == "KEYWORD": if t == "true": # true is -1, which is 0 negated self.vm_writer.write_push(self.vm_writer.CONST, 0) self.vm_writer.write_arithmetic("not") elif t == "false" or t == "null": self.vm_writer.write_push(self.vm_writer.CONST, 0) elif t == "this": self.vm_writer.write_push(self.vm_writer.POINTER, 0) # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t == "(": # ( expression ) # parse the expression self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t in "-~": # unary_op term # postfix order - add the next term and then do the operator # parse the rest of the term self.tokenizer.advance() self.compile_term() # write the unary operation if t == "-": self.vm_writer.write_arithmetic("neg") elif t == "~": self.vm_writer.write_arithmetic("not") elif tt == "IDENTIFIER": # varname, varname[expression], subroutine_call # do not write the identifer yet # get the next bit of the expression # if it is a [, then array; if it is a ( or ., then subroutine call # if none of above, then pass over tt2, t2 = self._token_next(True) if tt2 == "SYMBOL" and t2 in "(.": # subroutine call # back up and then compile the subroutine call self.tokenizer.retreat() self.compile_subroutine_call() elif tt2 == "SYMBOL" and t2 == "[": # array - read operation # write the base address onto the stack segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) # compile the offset expression self.tokenizer.advance() self.compile_expression() # add base and offset self.vm_writer.write_arithmetic("add") # put the resulting address into pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # read from that 0 onto the stack self.vm_writer.write_push(self.vm_writer.THAT, 0) # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance for the next statement self.tokenizer.advance() else: # none of above - just a single identifier segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) else: # unknown print "WARNING: Unknown term expression object:", tt, t # compiles a (possible empty) comma-separated list of expressions def compile_expression_list(self): num_args = 0 # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # expression to pass self.compile_expression() num_args += 1 # possible comma tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": self.tokenizer.advance() else: # not a comma; stop processing parameters break return num_args # compiles a subroutine call # two cases: # - subroutineName(expressionList) # - (class|var).subroutineName(expressionList) def compile_subroutine_call(self): # first part of name tt, name1 = self._token_next(False, "IDENTIFIER") # a dot and another name may exist, or it could be a parenthesis name2 = None tt, t = self._token_next(True) if tt == "SYMBOL" and t == ".": # the name after the dot tt, name2 = self._token_next(True, "IDENTIFIER") # advance so that we are on the parenthesis self.tokenizer.advance() # determine if this is a method call # three possibilities # - class.func() - function call # - var.func() - method call # - func() - method call on current object if self.symbol_table.contains(name1): method_call = True local_call = False elif name2 == None: method_call = True local_call = True else: method_call = False # if a method call, push variable name1 # this a method call if the symbol table contains name1 and name2 exists # OR name1 is a method in the current object if method_call and local_call: # push the current object onto the stack as a hidden argument self.vm_writer.write_push(self.vm_writer.POINTER, 0) elif method_call and not local_call: # push the variable onto the stack as a hidden argument segment, index = self._resolve_symbol(name1) self.vm_writer.write_push(segment, index) # opening parenthesis tt, t = self._token_next(False, "SYMBOL", "(") # expression list self.tokenizer.advance() num_args = self.compile_expression_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # write the call if method_call and local_call: # methd + <blank> # get the name of the vm function to call classname = self.class_name vm_function_name = classname + "." + name1 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) elif method_call and not local_call: # variable name + method # get the name of the vm function to call classname = self.symbol_table.get(name1)[1] vm_function_name = classname + "." + name2 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) else: # get the name of the vm function to call vm_function_name = name1 + "." + name2 # make the call self.vm_writer.write_call(vm_function_name, num_args) self.tokenizer.advance() # returns the token_type and token of the next token after advancing the # tokenizer before reading if advance is True def _token_next(self, advance=False, expected_type=None, expected_value=None): # advance the tokenizer, if requested if advance: self.tokenizer.advance() # get the token type and the token itself token_type = self.tokenizer.token_type() token = str(getattr(self.tokenizer, token_type.lower())()) if expected_type and token_type != expected_type: print "WARNING: Type", token_type, "found; expected", expected_type import traceback, sys traceback.print_stack() sys.exit(1) if expected_value and token != expected_value: print "WARNING: Value", token, "found; expected", expected_value import traceback, sys traceback.print_stack() sys.exit(1) return token_type, token # convets a symbol table type into a segment type def _type_to_segment(self, type): if type == self.symbol_table.STATIC: return self.vm_writer.STATIC elif type == self.symbol_table.FIELD: return self.vm_writer.THIS elif type == self.symbol_table.ARG: return self.vm_writer.ARG elif type == self.symbol_table.VAR: return self.vm_writer.LOCAL else: print "ERROR: Bad type %s" % (str(type)) # resolves the symbol from the symbol table # the segment and index is returned as a 2-tuple def _resolve_symbol(self, name): kind, type, index = self.symbol_table.get(name) return self._type_to_segment(kind), index
l_command_address = 0 #a_command_set = set() a_command_list = [] while asm.hasMoreCommands(): asm.advance() current_command = asm.currentCommand current_command_type = asm.commandType() if current_command_type == 'L_COMMAND': #get the symbol and symbol = asm.symbol() #add the symbol into the table only if it does not already exist if not symbol_table.contains(symbol): #add the symbol symbol_table.addEntry(symbol, l_command_address) continue if current_command_type == 'A_COMMAND': #get the symbol symbol = asm.symbol() #check to see if it really is a symbol or just a number if not symbol.isdigit(): if symbol not in a_command_list: a_command_list.append(symbol) l_command_address += 1