def makeTable(self, _prev): # create a new symbol table table newST = ST() # make _prev the parent of new table newST.parent = _prev # return the new symbol table return newST
def main(): "output file is the file where output will be written to" filename = sys.argv[1].split('.')[0] outputfile = open( filename + ".hack", "a" ) "input file is the file where input will come from" inputfile = Parser( sys.argv[1] ) lines = inputfile.commandLines() for line in lines: if( ParserComd( line ).commandType() == 'A_command' ): symbol_line = ParserComd( line ).symbol( ) symbol_a = SymbolTable( ) symbol_a.addEntry( symbol_line ) f = symbol_a.GetAddress( symbol_line ) outputfile.write( f ) outputfile.write( '\n' ) elif( ParserComd( line ).commandType() == 'C_command_a' or ParserComd( line ).commandType() == 'C_command_b'): dest_line = ParserComd( line ).dest() comp_line = ParserComd( line ).comp() jump_line = ParserComd( line ).jump() cbinary = Code( dest_line, comp_line, jump_line ).cinstruction() outputfile.write( cbinary ) outputfile.write( '\n' ) elif( ParserComd( line ).commandType() == 'L_command' ): outputfile.write( 'This line is going to delete\n' ) outputfile.close()
class symboltabletests(unittest.TestCase): def setUp(self): self.st = SymbolTable() def testContains(self): self.st.addEntry("loop", 100) self.assertTrue(self.st.contains("loop")) self.assertFalse(self.st.contains("bobby"))
def first_pass(path): p = Parser(path) symbol_table = SymbolTable() n = 0 while(p.hasMoreCommands()): command_type = p.commandType() if(command_type == CommandType.L): symbol_table.add_entry(p.symbol(), n) else: n += 1 p.advance() return symbol_table
def visit_Fundef(self, node): node.Functions.putNewFun(node.id, node.type) Functions = FunctionsTable(node.Functions, "Functions") Variables = SymbolTable(node.Variables, "Variables") node.argList.Functions = Functions node.argList.Variables = Variables listOfArguments = node.argList.accept(self) for element in listOfArguments: if element!= None: node.Functions.put(node.id, element[1]) if Variables.put(element[0], element[1])==-1: self.errors.append("In line "+ str(node.lineno) + ": variable "+ element.name + " was initialized") node.compoundInstr.Functions = Functions node.compoundInstr.Variables = Variables node.compoundInstr.accept(self)
def setUp(self): self.assembler = Assembler() parser = Parser() self.symbolTable = SymbolTable() self.assembler.setSymbolTable(self.symbolTable) self.assembler.setParser(parser)
def __init__(self): # create the root symbol table self.root = ST() # intialize the stack of symbol tables for activation record self.activeSTs = [self.root] # table of funcitons self.ftable = {};
def visit_FunDef(self,node): # print "visiting FunDef" self.symbolTable = SymbolTable(self.symbolTable,node.id.value) self.symbolTable.getParentScope().put(node.id.value,FunSymbol(node.typeOrId, node.id.value, map(lambda x: x.accept(self),node.argList.list))) node.compoundInstr.accept(self) self.symbolTable = self.symbolTable.getParentScope() return node.id.value
def __init__(self, inputFile, outputFile): self.tokenizer = JackTokenizer(inputFile) self.vmWriter = VMWriter(outputFile) self.symbolTable = SymbolTable() self.classname = "" self.CompileClass() self.whilecounter = 0 self.ifcounter = 0
def visit_ClassDef(self,node): # print "visiting ClassDef" self.symbolTable = SymbolTable(self.symbolTable if node.parentId == None else self.classTables[node.parentId.value], node.id.value) classSymbol = ClassSymbol(node.accessmodificator, node.id.value, node.parentId if node.parentId == None else node.parentId.accept(self),node.classcontent.accept(self)) self.classTables[node.id.value]=self.symbolTable while self.symbolTable.parent!=None: self.symbolTable = self.symbolTable.getParentScope() self.symbolTable.put(node.id.value,classSymbol)
def visit_Program(self,node): try: #print "visiting Program" self.symbolTable=SymbolTable(None,'main') node.declarations.accept(self) node.fundefs.accept(self) node.instructions.accept(self) except: self.error("could not continue parsing, correct errors first",0)
def __init__(self, tokenizer, outputFile, vmFile): from SymbolTable import SymbolTable from VMWriter import VMWriter self.tokenizer = tokenizer self.outputFile = outputFile self.symbolTable = SymbolTable() self.vmWriter = VMWriter(vmFile) self.labelNum = 0 print(outputFile)
def __init__(self,tokens,vmwriter): try: tokens[0].value tokens[0].type except: sys.exit("Parser did not take in a list of tokens!") self.tokens=tokens self.vmwriter=vmwriter self.symTable=SymbolTable()
def visit_CompoundInstruction(self, node): self.scope = SymbolTable(self.scope, "compound") for declaration in node.decls: declaration.accept(self) for instruction in node.instrs: instruction.accept(self) # Get the hell out of function scope, after its done self.scope = self.scope.parent
def __init__(self, tokenizer, out_file_name): """ Constructor """ self._tokenizer = tokenizer self._vm_writer = VMWriter(out_file_name) self._class_name = None self._symbol_table = SymbolTable() self._counter = 0 self._subroutine_name = None
def __init__(self, input_file, output_file): self.jack_tokenizer = JackTokenizer(input_file) self.symbol_table = SymbolTable() self.writer = VMWriter(output_file) self.class_name = "" self.subroutine_name = "" self.return_type = "" self.label_counter_if = 0 self.label_counter_while = 0 self.num_args_called_function = 0 self.is_unary = False self.dic_arithmetic = {"+" : "add" , "-" : "sub", "*" : "call Math.multiply 2", "/" : "call Math.divide 2", "&" : "and", "|" : "or", "<" : "lt", ">" : "gt", "=" : "eq"}
def __init__(self, input_file_path): self.output_file = open(input_file_path.replace('.asm', '.hack'), 'w') """ The ROM address is the address of the current instruction written in the .hack file. The first instruction is 0, second is 1, etc. Label is not a instruction. """ self.current_rom_address = ROM_BASE_ADRESS self.parser = Parser(input_file_path) self.symbol_table = SymbolTable() """ The RAM address of the next free memory that a new variable should be at. """ self.next_free_var_address = VARIABLES_BASE_ADDRESS
def compileClass(self): """Compiles a complete class""" self.classTable = SymbolTable() # 'class' className '{' classVarDec* subroutineDec* '}' # class self.getNext() # className self.className = self.tokenizer.getToken() self.getNext() # { self.getNext() token = self.tokenizer.getToken() while token in ["static", "field"]: self.compileDec() token = self.tokenizer.getToken() token = self.tokenizer.getToken() while token in ["constructor", "function", "method"]: self.compileSubroutine() token = self.tokenizer.getToken() # } self.getNext()
def visit_Fundef(self, node): symbol = self.scope.getDirect(node.name) if symbol is None: self.scope.put(node.name, node) else: print("Symbol {0} already defined at {1}:{2}. First defined at {3}:{4}".format( node.name, node.pos[0], node.pos[1], symbol.pos[0], symbol.pos[1])) # Create new scope for function self.scope = SymbolTable(self.scope, node.name) # Leave information about return type of the function self.return_type = node.return_type self.returned = False for argument in node.arguments: argument.accept(self) node.body.accept(self) if not self.returned: print("No return statement found in function {0} defined at {1}:{2}".format(node.name, node.pos[0], node.pos[1])) # Clear information about return type self.return_type = None self.returned = False # Get the hell out of function scope, after its done self.scope = self.scope.parent
def main(): #make sure they used the program right if len(sys.argv) != 2: print("usage: python assempler.py <some .asm file>") return #get the path to the asm file path = sys.argv[1] #make sure it is an asm file if path.split('.')[1] != 'asm': print("Error: you did not supply an asm file") #parse the asm file to get the instructions in a good format to parse instructions = [] file = open(path, 'r') #create modules and keep track of current ram address for symbols parser = Parser(file) code = Code() symbolTable = SymbolTable() symbolEntry = 16 #holds the binary output lines output = [] #first pass to add L command labels to symbol table rom = 0 while parser.hasMoreCommands(): if parser.commandType() == "L_COMMAND": print(parser.symbol()) symbolTable.addEntry(parser.symbol(), rom) else: rom += 1 parser.advance() #reset the parser for 2nd pass parser.reset() #2nd pass while parser.hasMoreCommands(): #get command type and create a command to output iType = parser.commandType() command = None print(parser.currentInstruction) if iType == 'C_COMMAND': #get all parts of c command in binary dest = code.dest(parser.dest()) comp = code.comp(parser.comp()) jump = code.jump(parser.jump()) #error check if dest is None or comp is None or jump is None: print("Error: invalid dest, comp, or jump") return else: command = '111' + comp + dest + jump elif iType == 'A_COMMAND': #get symbol and error check symbol = parser.symbol() if symbol is None: print("Error: invalid symbol declaration") return #just convert to binary if integer if isInt(symbol): command = decimalToBinary(symbol) else: #if the symbol isnt in the symbol table add it if not symbolTable.contains(symbol): symbolTable.addEntry(symbol, symbolEntry) symbolEntry += 1 #convert address from symbol table to binary command = decimalToBinary(symbolTable.getAddress(symbol)) #since l commands are already handles, dont do anything elif iType == 'L_COMMAND': parser.advance() continue #error check command and add to output if command is None: print("Error: binary string longer than 16bits") return else: output.append(command) #next line parser.advance() #write to file but change to .hack outputPath = os.path.splitext(path)[0] + '.hack' outfile = open(outputPath, 'w') for binary in output: outfile.write(binary + '\n')
def __init__(self): self.symbol_table = SymbolTable(None, "TypeChecker", {}) self.ttypes = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None))) self.fill_ttypes()
from SymbolTable import SymbolTable SymbolT = SymbolTable() SymbolT.print() SymbolT.insert(id="var", tipo="int", attributes=None) SymbolT.print() SymbolT.insert(id="a", tipo="double", attributes=None) SymbolT.insert(id="function", tipo="void", attributes=None) SymbolT.print() print(SymbolT.lookup(id="a"))
class TypeChecker(NodeVisitor): def visit_BinExpr(self, node): if isinstance(node.left, str): type1 = node.left else: type1 = self.visit(node.left) # type1 = node.left.accept(self) if isinstance(node.right, str): type2 = node.right else: type2 = self.visit(node.right) # type2 = node.right.accept(self) op = node.op if ttype[op][type1][type2] is None: print("Wrong expression " + op + " in line: " + self.my_str(node.line)) return ttype[op][type1][type2] def visit_UnaryExpr(self, node): return self.visit(node.expr) def visit_PrintInstruction(self, node): self.visit(node.expr_list) def visit_FunctionExpression(self, node): function_definition = self.symbol_table.getGlobal(node.name) if function_definition is None or not isinstance(function_definition, FunctionSymbol): print("function " + node.name + " is not defined" + " in line: " + self.my_str(node.line)) else: if node.expr is not None: types = [self.visit(child) for child in node.expr.children] declared_types = function_definition.args if len(types) != len((declared_types)): print("Wrong arguments in function " + node.name + " in line: " + self.my_str(node.line)) else: for given_type, declared_type in zip(types, declared_types): if given_type != declared_type: print("Mismatching argument types Expected " + self.my_str(declared_type) + ", got " + self.my_str(given_type) + "in line: " + self.my_str(node.line)) elif function_definition.args != []: print("Worng number of arguments in function: " + node.name + "in line: " + self.my_str(node.line)) return function_definition.type def visit_Variable(self, node): dec = self.symbol_table.getGlobal(node.name) if dec is None: print("Undefined symbol: " + node.name + "in line: " + self.my_str(node.line)) else: return dec.type def visit_WhileInstr(self, node): self.is_in_loop = True self.visit(node.condition) self.visit(node.instruction) self.is_in_loop = False def visit_RepeatInstr(self, node): self.is_in_loop = True self.visit(node.condition) self.visit(node.instructions) self.is_in_loop = False def visit_Return_instr(self, node): if self.current_function is None: print("Return placed outside of a function in line " + self.my_str(node.line)) else: type = self.visit(node.expression) if type != self.current_function.type: print("Expected reutrn type " + self.my_str(self.current_function.type) + " actual" + self.my_str(type)+ "in line: " + self.my_str(self.my_str(node.line))) def visit_Fundef(self, node): if self.symbol_table.get(node.id): print("Function " + node.id + "already defined" + "in line: " + self.my_str(node.line)) else: function = FunctionSymbol(node.id, node.type, SymbolTable(self.symbol_table, node.id)) self.symbol_table.put(node.id, function) self.current_function = function self.symbol_table = self.current_function.symbol_table if node.arg_list is not None: self.visit(node.arg_list) self.visit(node.compound_instr) self.symbol_table = self.symbol_table.getParentScope() self.current_function= None def visit_Arg(self, node): if self.symbol_table.get(node.id) is not None: print("Double argument in function: " + node.id + "in line: " + self.my_str(node.line)) else: self.symbol_table.put(node.id, VariableSymbol(node.id, node.type)) self.current_function.put_arg(node.type) def visit_RelExpr(self, node): type1 = self.visit(node.left) # type1 = node.left.accept(self) type2 = self.visit(node.right) # type2 = node.right.accept(self) # ... # def visit_Integer(self, node): return 'int' def visit_Float(self, node): return 'float' def visit_String(self, node): return 'string' def visit_Init(self, node): given_type = self.visit(node.expression) if given_type == self.current_type or (given_type == "int" and self.current_type =="float"): if self.symbol_table.get(node.id) is not None: print("The" + node.id + "was already defined" + "in line: " + self.my_str(node.line)) else: self.symbol_table.put(node.id, VariableSymbol(node.id, self.current_type)) else: print("Forbidden type assignment " + self.my_str(given_type) + " to " + self.my_str(self.current_type)+ "in line: " + self.my_str(node.line)) def visit_CompoundInstr(self, node): self.symbol_table = SymbolTable(self.symbol_table, "inner") self.visit(node.declarations) self.visit(node.instructions_opt) self.symbol_table = self.symbol_table.getParentScope() def visit_ChoiceInstr(self, node): self.visit(node.condition) self.visit(node.instruction) if node.elseInstruction is not None: self.visit(node.elseInstruction) def visit_Assignment(self, node): definition = self.symbol_table.getGlobal(node.id) type = self.visit(node.expression) if definition is None: print("Used undefined symbol " + node.id + "in line: " + self.my_str(node.line)) elif type != definition.type and (definition.type != "float" and definition != "int"): print("Bad assignment of " + self.my_str(type) + " to " + self.my_str(definition.type) + "in line: " + self.my_str(node.line)) def visit_Block(self, node): self.visit(node.block) def visit_Declaration(self,node): self.current_type = node.type self.visit(node.inits) self.current_type = "" def visit_ContinueInstr(self, node): if self.is_in_loop == False: print("Continue instr used outsied of function") def visit_BreakInstr(self, node): if self.is_in_loop == False: print("Break instr used outsied of function") def visit_Program(self, node): self.visit(node.blocks) def my_str(self, s): return 'None' if s is None else str(s)
class CompilationEngine(): OPERATORS = ['+', '-', '*', '/', '&', '|', '<', '>', '='] def __init__(self, token_file, output_file): """ Creates a new compilation engine with the given input and output. The next routine called must be compileClass. """ if os.path.exists(output_file): os.remove(output_file) self.input = open(token_file, 'r') self.output = open(output_file, 'a+') self.current_line = self.input.readline() self.symbol_table = None self.code_writer = VMWriter(output_file) self.label_counter = 0 self._compile() def _compile(self): """ Compiles the whole Jack program. """ # Pula a primeira linha, que identifica o arquivo de tokens # Percorre o arquivo até o fim self.current_line = self.input.readline() while "</tokens>" not in self.current_line: self.compileClass() def _identify_key(self, line): tag_end = line.find('>') return line[1:tag_end] def _identify_value(self, line): first_tag_end = line.find('> ') last_tag_start = line.find(' </') return line[first_tag_end+2:last_tag_start] def _skipLine(self): self.current_line = self.input.readline() def _generateLabel(self): label = "L{}".format(self.label_counter) self.label_counter += 1 return label def compileClass(self): """ Compiles a complete class. """ # Cada classe nova deve ter uma symbol table nova self.symbol_table = SymbolTable() # Avança a linha <keyword> class </keyword> self._skipLine() # Grava e avança o nome da classe <identifier> nome </identifier> name = self._identify_value(self.current_line) self._skipLine() # Avança o símbolo de início da classe <symbol> { </symbol> self._skipLine() self.compileClassVarDec() self.compileSubroutineDec(name) # Avança o símbolo de fechamento da classe <symbol> } </symbol> self._skipLine() def compileClassVarDec(self): """ Compiles a static variable declaration, or a field declaration. """ # Escreve múltiplas declarações de variável seguidas while self._identify_value(self.current_line) in ["var", "static", "field"]: # Grava e avança a declaração do dado kind = self._identify_value(self.current_line) self._skipLine() # Grava e avança o tipo de dado type = self._identify_value(self.current_line) self._skipLine() # Escreve a declaração até que encontre o último caracter while self._identify_value(self.current_line) != ';': if self._identify_key(self.current_line) != "symbol": # Se não for uma vírgula, é um novo nome de variável # Grava e avança o nome name = self._identify_value(self.current_line) self._skipLine() # Adiciona a variável à symbol table self.symbol_table.define(name, type, kind) else: # Se for uma vírgula, avança a linha self._skipLine() # Avança o último caracter ; self._skipLine() def compileSubroutineDec(self, class_name): """ Compiles a complete method, function, or constructor. """ # Analisa múltiplos métodos ou funções seguidos while self._identify_value(self.current_line) in [ "method", "function", "constructor" ]: # Cria uma nova symbol table para o escopo da subrotina self.symbol_table.startSubroutine() # Avança a declaração <keyword> function </keyword> self._skipLine() # Grava e avança o tipo de retorno <keyword> void </keyword> type = self._identify_value(self.current_line) self._skipLine() # Grava e avança o nome da função <identifier> nome </identifier> name = self._identify_value(self.current_line) self._skipLine() # Avança a declaração dos parâmetros <symbol> ( </symbol> self._skipLine() # Recebe e grava a quantidade de parâmetros na lista de parâmetros n_params = self.compileParameterList() # Avança a conclusão dos parâmetros <symbol> ) </symbol> self._skipLine() # Escreve a declaração da função no arquivo .vm self.code_writer.writeFunction( "{}.{}".format(class_name, name), n_params ) self.compileSubroutineBody() def compileParameterList(self): """ Compiles a (possibly empty) parameter list. Does not handle the enclosin "()". """ parameters_count = 0 # Escreve todas as linhas até encontrar o caracter de fim de parâmetros while self._identify_value(self.current_line) != ')': if self._identify_key(self.current_line) != "symbol": # Guarda e avança o tipo do argumento <keyword> int </keyword> type = self._identify_value(self.current_line) self._skipLine() # Guarda o nome do argumento <identifier> nome </identifier> name = self._identify_value(self.current_line) self._skipLine() # Adiciona o argumento à symbol table da subrotina self.symbol_table.define(name, type, "argument") # Aumenta a contagem de parâmetros parameters_count += 1 else: # Avança a vírgula self._skipLine() return parameters_count def compileSubroutineBody(self): """ Compiles a subroutine's body. """ # Avança a abertura de bloco <symbol> { </symbol> self._skipLine() self.compileVarDec() self.compileStatements() # Avança o término do bloco <symbol> } </symbol> self._skipLine() def compileVarDec(self): """ Compiles a var declaration. """ # Escreve múltiplas declarações de variáveis seguidas while self._identify_value(self.current_line) == "var": # Grava e avança a declaração da variável <keyword> var </keyword> kind = self._identify_value(self.current_line) self._skipLine() # Grava e avança o tipo da variável <keyword> int </keyword> type = self._identify_value(self.current_line) self._skipLine() # Avança a declaração até que encontre o último caracter while self._identify_value(self.current_line) != ';': if self._identify_key(self.current_line) != "symbol": # Se não for uma vírgula, é um novo nome de variável # Grava e avança o nome da variável name = self._identify_value(self.current_line) self._skipLine() # Adiciona a variável à symbol table self.symbol_table.define(name, type, kind) else: # Avança a vírgula self._skipLine() # Avança o último caracter ; self._skipLine() def compileStatements(self): """ Compiles a sequence os statements. Does not handle the enclosing "{}"; """ keyword = self._identify_value(self.current_line) # Verifica múltiplos statements while keyword in ["let", "if", "while", "do", "return"]: if keyword == "let": self.compileLet() elif keyword == "if": self.compileIf() elif keyword == "while": self.compileWhile() elif keyword == "do": self.compileDo() elif keyword == "return": self.compileReturn() keyword = self._identify_value(self.current_line) def compileLet(self): """ Compiles a let statement. """ # Avança a keyword <keyword> let </keyword> self._skipLine() # Grava e avança o nome da variável <identifier> nome </identifier> name = self._identify_value(self.current_line) self._skipLine() # Se tiver [, é de um array e deve conter uma expressão dentro if self._identify_value(self.current_line) == '[': # Avança a abertura de chave [ self._skipLine() # Compila a expressão self.compileExpression() # Avança o fechamento de chave ] self._skipLine() # Avança a associação <symbol> = </symbol> self._skipLine() # Compila a expressão self.compileExpression() # Avança o fim da declaração <symbol> ; </symbol> self._skipLine() # Escreve o resultado da expressão na variável usando o pop kind = self.symbol_table.kindOf(name) index = self.symbol_table.indexOf(name) self.code_writer.writePop(kind, index) def compileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ else_label = self._generateLabel() end_label = self._generateLabel() # Avança a keyword <keyword> if </keyword> self._skipLine() # Avança o início da expressão <symbol> ( </symbol> self._skipLine() # Compila a expressão de verificação self.compileExpression() # Avança o fim da expressão <symbol> ) </symbol> self._skipLine() # Nega a expressão de verificação no arquivo .vm self.code_writer.writeArithmetic("~") # Redireciona para o else no arquivo .vm self.code_writer.writeIf(else_label) # Inicia o bloco do if <symbol> { </symbol> self._skipLine() while self._identify_value(self.current_line) != '}': self.compileStatements() # Avança o fim do bloco <symbol> } </symbol> self._skipLine() # Redireciona para o fim da verificação no .vm self.code_writer.writeGoto(end_label) # Escreve a label do else no arquivo .vm self.code_writer.writeLabel(else_label) # Confere se existe um bloco else if self._identify_value(self.current_line) == "else": # Avança o else <keyword> else </keyword> self._skipLine() # Avança o início do bloco <symbol> { </symbol> self._skipLine() # Escreve o conteúdo do bloco while self._identify_value(self.current_line) != '}': self.compileStatements() # Avança o fim do bloco <symbol> } </symbol> self._skipLine() # Escreve a label de fim de bloco self.code_writer.writeLabel(end_label) def compileWhile(self): """ Compiles a while statement. """ # Define as 2 labels necessárias start_label = self._generateLabel() end_label = self._generateLabel() # Escreve a label de início no arquivo .vm self.code_writer.writeLabel(start_label) # Avança o início da declaração <keyword> while </keyword> self._skipLine() # Avança o início da expressão <symbol> ( </symbol> self._skipLine() # Compila a expressão de verificação self.compileExpression() # Nega a expressão de verificação no arquivo .vm self.code_writer.writeArithmetic("~") # Verifica a expressão e escreve um if-goto no arquivo .vm self.code_writer.writeIf(end_label) # Avança o fim da expressão </symbol> ) </symbol> self._skipLine() # Avança o início do bloco e continua até o fim do mesmo self._skipLine() # Compila o conteúdo do while while self._identify_value(self.current_line) != '}': self.compileStatements() # Avança o fim do bloco <symbol> } </symbol> self._skipLine() # Escreve um goto no arquivo para voltar ao início do loop no .vm self.code_writer.writeGoto(start_label) # Escreve label final para sair do loop no .vm self.code_writer.writeLabel(end_label) def compileDo(self): """ Compiles a do statement. """ # Avança o comando <keyword> do </keyword> self._skipLine() # Identifica a função a ser chamada até o início dos parâmetros function = "" while self._identify_value(self.current_line) != '(': # Adiciona o valor para montar o nome da chamda function += self._identify_value(self.current_line) # Avança para o próximo valor self._skipLine() # Avança o início da lista de expressões <symbol> ( </symbol> self._skipLine() # Compila a lista de expressões n_args = self.compileExpressionList() # Avança o fim da lista <symbol> ) </symbol> self._skipLine() # Avança o fim do statement <symbol> ; </symbol> self._skipLine() # Escreve a chamada da função no arquivo .vm self.code_writer.writeCall(function, n_args) # Como a função 'do' não retorna nada, precisamos fazer um pop # do valor gerado para a pilha temporária self.code_writer.writePop("temp", 0) def compileReturn(self): """ Compiles a return statement. """ # Avança o ínicio da declaração <keyword> return </keyword> self._skipLine() if self._identify_key(self.current_line) != "symbol": # Compila a expressão de retorno self.compileExpression() else: # A função não retorna nada, mas é esperado um valor de retorno # Por isso informamos 0 self.code_writer.writePush("constant", 0) # Avança o fim da declaração <symbol> ; </symbol> self._skipLine() # Escreve o comando de return no arquivo .vm self.code_writer.writeReturn() def compileExpression(self): """ Compiles an expression. """ # Sempre inicia com um termo self.compileTerm() # Verificamos a necessidade de outro termo operator = self._identify_value(self.current_line) if operator in self.OPERATORS: # Avança o operador self._skipLine() # Compila o próximo termo self.compileTerm() # Escreve a operação no arquivo self.code_writer.writeArithmetic(operator) def compileTerm(self): """ Compiles a term. If the current token is an identifier, the routine must distinguish between a variable , an array entry, or a subroutine call. A single look-ahead token, which may be one of "[", "(", or ".", suffices to distinguish between the possibilities. Any other token is not part of this term and should not be advanced over. """ if self._identify_key(self.current_line) == "identifier": # Pode ser um nome de variável ou uma chamada de função # var[expressao], funcao.chamada() # Por isso gravamos e avançamos o identificador e # verificamos por caracteres especiais name = self._identify_value(self.current_line) self._skipLine() if self._identify_value(self.current_line) == '.': # Se a linha for um símbolo . é uma chamada a uma função # Grava e avança o ponto name += "." self._skipLine() # Grava e avança o nome da função name += self._identify_value(self.current_line) self._skipLine() # Avança o símbolo de início da chamada ( self._skipLine() # Se houver uma expressão dentro da chamada, compila # Se não, compila a lista em branco n_args = self.compileExpressionList() # Avança o símbolo de fim da chamada ) self._skipLine() # Escreve a chamada da função no arquivo .vm self.code_writer.writeCall(name, n_args) elif self._identify_value(self.current_line) == '[': # Se a linha for um símbolo [ é um acesso ao array # Avança a chave [ self._skipLine() # Compila a expressão dentro das chaves self.compileExpression() # Avança a chave ] self._skipLine() kind = self.symbol_table.kindOf(name) index = self.symbol_table.indexOf(name) # Escreve o push do array no arquivo .vm self.code_writer.writePush(kind, index) self.code_writer.writeArithmetic('+') self.code_writer.writePop('pointer', 1) self.code_writer.writePush('that', 0) else: # Faz o push do identifier no arquivo .vm kind = self.symbol_table.kindOf(name) index = self.symbol_table.indexOf(name) self.code_writer.writePush(kind, index) elif self._identify_value(self.current_line) == '(': # Avança a abertura de expressão ( self._skipLine() # Compila a expressão self.compileExpression() # Avança o encerramento da expressão ) self._skipLine() elif self._identify_key(self.current_line) == "keyword": # Faz o push do valor no arquivo .vm value = self._identify_value(self.current_line) if value == "true": self.code_writer.writePush("constant", 0) self.code_writer.writeArithmetic('~') elif value == "false": self.code_writer.writePush("constant", 0) self._skipLine() elif self._identify_key(self.current_line) == "stringConstant": # Grava a string string = self._identify_value(self.current_line) # Escreve o tamanho e chama a criação de string no arquivo .vm self.code_writer.writePush("constant", len(string)) self.code_writer.writeCall("String.appendChar", 1) # Escreve o código e adiciona cada caracter no arquivo .vm for char in string: self.code_writer.writePush("constant", ord(char)) self.code_writer.writeCall("String.appendChar", 2) elif self._identify_key(self.current_line) == "integerConstant": # Adiciona a constante à pilha num = self._identify_value(self.current_line) self.code_writer.writePush("constant", num) # Avança a linha self._skipLine() elif self._identify_value(self.current_line) in ['-', '~']: # É um operador unário e ainda tem outra parte do termo # depois dele, portanto escreve o operador e o próximo termo op = self._identify_value(self.current_line) op = op if op == '~' else 'neg' self._skipLine() self.compileTerm() self.code_writer.writeArithmetic(op) def compileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. """ arguments_count = 0 while self._identify_value(self.current_line) != ')': if self._identify_value(self.current_line) == ',': # Avança a vírgula self._skipLine() else: # Compila a expressão self.compileExpression() # Incrementa a contagem de argumentos arguments_count += 1 return arguments_count
class DecafSemanticChecker(DecafVisitor): def __init__(self): super().__init__() self.st = SymbolTable() # initialise an empty Symbol Table object def visitProgram(self, ctx: DecafParser.ProgramContext): self.st.enterScope() # enter symbol table scope self.visitChildren(ctx) self.st.exitScope() def visitVar_decl(self, ctx: DecafParser.Var_declContext): # semantic rule: No identifier is declared twice in the same scope # test with testdata/semantics/illegal-01.dcf line_num = ctx.start.line for var_decl in ctx.ID(): var_name = var_decl.getText() # gets the variable name (eg. x) var_symbol = self.st.probe( var_name) # search Symbol Table for variable entry if var_symbol != None: # if variable does NOT exist in Symbol Table print('Error on line', line_num, 'variable \'', var_name, '\' already declared on line', var_symbol.line) else: var_symbol = VarSymbol(id=var_name, type='int', line=line_num, size=8, mem=STACK) self.st.addSymbol( var_symbol ) # add var_symbol to the scope (st abbreviation of SymbolTable) return self.visitChildren(ctx) def visitStatement(self, ctx: DecafParser.StatementContext): # semantic rule: No identifier is used before it is declared if ctx.location() != None: line_num = ctx.start.line var_name = ctx.location().ID().getText() var_symbol = self.st.lookup(var_name) if var_symbol == None: print('Error on line', line_num, 'variable \'', var_name, '\'is not declared') self.visitChildren(ctx) # semantic rule: warn the user that any method defined after the main method will never be executed. # semantic rule: int_literal in an array declaration must be greater than 0 def visitField_name(self, ctx: DecafParser.Field_nameContext): if ctx.int_literal() != None: if int(ctx.int_literal().DECIMAL_LITERAL().getText()) < 1: line_num = ctx.start.line var_name = ctx.ID().getText() print("Error on line", line_num, "variable '", var_name, "' array size must be greater than 0") return self.visitChildren(ctx) # semantic rule 5: number and types of arguments in a method call must be the same as # the number and types of the formals, i.e., the signatures must be identical. def visitMethod_decl(self, ctx: DecafParser.Method_declContext): method_name = ctx.ID()[0].getText() method_return_type = ctx.return_type().getText() line_num = ctx.start.line method_params = [] for x in ctx.data_type(): method_params.append(x.getText()) # get data type as a string method_symbol = MethodSymbol( id=method_name, type=method_return_type, line=line_num, params=method_params) # create a method symbol with ctx values self.st.addSymbol( method_symbol ) # push method symbol with params list to global scope return self.visitChildren(ctx) def visitMethod_call(self, ctx: DecafParser.Method_callContext): # get method call line_num = ctx.start.line method_name = ctx.method_name().getText() # lookup method call name in symbol table method_symbol = self.st.lookup(method_name) method_symbol_params = method_symbol.params if len(ctx.expr()) != len(method_symbol_params): return print( "Error you passed an incorrect combination of parameters", "on line", line_num, ", the number and types of arguments in a method call must be the same as the number and types of the formals" ) else: for i in range(max(len(method_symbol_params), len(ctx.expr()))): # check out of bound index if i >= len(method_symbol_params): print( "Error you passed an unexpected parameter", ctx.expr()[i].literal().getText(), "on line", line_num, ", the number and types of arguments in a method call must be the same as the number and types of the formals" ) else: if method_symbol_params[i] == 'int': if ctx.expr()[i].literal().int_literal() == None: print( "Error incorrect parameter data type expected", method_symbol.type, "received value", ctx.expr()[i].literal().getText(), "on line", line_num, ", the number and types of arguments in a method call must be the same as the number and types of the formals" ) elif method_symbol_params[i] == 'boolean': if ctx.expr()[i].literal().bool_literal() == None: print( "Error incorrect parameter date type expected", method_symbol.type, "received", ctx.expr()[i].literal(), "on line", line_num, ", the number and types of arguments in a method call must be the same as the number and types of the formals" ) else: print( "missing method_symbol_params with data type classification:", method_symbol_params[i], " on line number", line_num, ", the number and types of arguments in a method call must be the same as the number and types of the formals" ) return self.visitChildren(ctx)
def test_symbol_table(self): table = SymbolTable() table.define("x", "int", KIND_STATIC) table.define("x", "int", KIND_ARGUMENT) self.assertEqual(table.var_count(KIND_STATIC), 1) self.assertEqual(table.var_count(KIND_ARGUMENT), 1) table.define("y", "int", KIND_VAR) self.assertEqual(table.var_count(KIND_VAR), 1) table.start_subroutine() self.assertEqual(table.var_count(KIND_ARGUMENT), 0) self.assertEqual(table.var_count(KIND_VAR), 0) self.assertEqual(table.var_count(KIND_STATIC), 1) table.define("x", "int", KIND_VAR) with self.assertRaises(ValueError): table.type_of('y') table.define("x2", "int", KIND_VAR) table.define("x3", "int", KIND_VAR) self.assertEqual(table.index_of('x'), 0) self.assertEqual(table.index_of('x2'), 1) with self.assertRaises(ValueError): table.define("x", "char", KIND_ARGUMENT)
""" Assembler Translates HACK assembly into HACK machine code. @author: Kyle June """ import sys from Parser import Parser import Code from SymbolTable import SymbolTable asmFilename = sys.argv[1] # This goes through the file and adds the address for each label to the symbol table. parser = Parser(asmFilename) symbolTable = SymbolTable() romAddress = 0 while parser.hasMoreCommands(): parser.advance() if parser.commandType() == "L_COMMAND": symbolTable.addEntry(parser.symbol(), romAddress) else: romAddress += 1 # This opens the file that will be written to. hackFilename = asmFilename[:-3] + "hack" hackFile = open(hackFilename, "w") # This writes the translated code to the hack file. parser.restart() ramAddress = 16
class DecafCodeGenVisitor(DecafVisitor): # Global variables, used to keep track of how many if statements, callouts and loops exist in the code. IF_LABEL_COUNT = 1 CALLOUT_COUNT = 1 LOOP_COUNT = 1 # Constructor sets up the header of the assembly code. def __init__(self): super().__init__() self.st = SymbolTable() self.head = '.data\n' self.body = '.global main\n' # Visits the program node, ensures there is a main method. def visitProgram(self, ctx:DecafParser.ProgramContext): self.st.enterScope() self.visitChildren(ctx) method_symbol = self.st.lookup('main') params = [] # Checks if main method has been declared and if it contains paramaters. if method_symbol == None: print('[Error]: No main method has been declared.') else: if len(params) != 0: print('[Error]: The main method cannot contain paramaters.') self.body += 'ret\n' self.st.exitScope() # Visits the method declaration node, checks if method is already declared and manages parameters. def visitMethod_decl(self, ctx:DecafParser.Method_declContext): method_name = ctx.ID(0).getText() return_type = ctx.TYPE(0) line_number = ctx.start.line # Checks if the method has already been declared. if self.st.probe(method_name) != None: print('[Error]: The method ' + method_name + ' on line: ' + line_number + 'was already declared!') else: self.body += method_name self.body += ':\n' params = [] # Loops through paramaters and creates a var symbol for them and appends them to a list. if len(params) > 1: for param in range(len(ctx.ID())): param_name = ctx.ID(param).getText() params.append(param_name) var_symbol = self.st.probe(param_name) if var_symbol == None: var_symbol = VarSymbol(id=param_name, type='int', line=ctx.start.line, size=8, mem=self.st.stack_pointer) self.st.addSymbol(var_symbol) var_addr = var_symbol.getAddr() self.body += '\tmovq %rax, -' + str(var_addr[0]) + '(%rsp)\n' params.pop(0) method_symbol = MethodSymbol(id=method_name, type=return_type, line=line_number, params=params) self.st.addSymbol(method_symbol) visit = self.visitChildren(ctx) return visit # Visits block node, enters a new scope inside the block. def visitBlock(self, ctx:DecafParser.BlockContext): self.st.enterScope() visit = self.visitChildren(ctx) self.st.exitScope() return visit # Visits expression node, handles variable assignment. def visitExpr(self, ctx:DecafParser.ExprContext): # Expression is a variable. if ctx.location(): var_name = ctx.location().getText() var_symbol = self.st.lookup(var_name) if "[" in var_name: split_var = var_name.split('[', 1)[0] var_symbol = self.st.lookup(split_var) if var_symbol == None: print('[Error]: Variable', var_name, 'has not been declared. Found on line', ctx.start.line) else: var_addr = var_symbol.getAddr() self.body += '\tmovq -' + str(var_addr[0]) + '(%rsp), %rax\n' # Expression is a literal (number or string/char) elif ctx.literal(): number = ctx.literal().getText() if number == 'false': number = '0' if number == 'true': number = '1' self.body += '\tmovq $' + number + ', %rax\n' # Expression length is more than 1 (more expressions present such as an operation) elif len(ctx.expr()) > 1: # Visit the first expression. self.visit(ctx.expr(0)) # Move stack pointer 1 place and save value of first expression. self.st.stack_pointer[-1] += 8 self.body += '\tmovq %rax, ' + str(-self.st.stack_pointer[-1]) + '(%rsp)\n' # Visit the second expression. self.visit(ctx.expr(1)) self.body += '\tmovq ' + str(-self.st.stack_pointer[-1]) + '(%rsp), %r10\n' self.st.stack_pointer[-1] -= 8 self.body += '\tmovq %rax, %r11\n' # If a binary operator is present, check the operator and add appropriate code. if ctx.BIN_OP(): if str(ctx.BIN_OP()) == '+': self.body += '\taddq %r10, %r11\n' if str(ctx.BIN_OP()) == '*': self.body += '\timul %r10, %r11\n' if str(ctx.BIN_OP()) == '-': self.body += '\tsubq %r10, %r11\n' if str(ctx.BIN_OP()) == '/': self.body += '\tmovq $0, rdx\n' self.body += '\tmovq %r11, rbx\n' self.body += '\tmovq %r10, rax\n' self.body += '\tidiv %rbx\n' self.body += '\tmovq %r11, %rax\n' # Visits the variable declaration node, handles storage of variables and name checking. def visitVar_decl(self, ctx:DecafParser.Var_declContext): # Loops through all variables (to evaluate int x, y, z for example.) for i in range(len(ctx.ID())): var_name = ctx.ID(i).getText() var_symbol = self.st.probe(var_name) if "[" in var_name: array_var_name = ctx.ID(i).getText() split_var = array_var_name.split('[', 1)[0] else: if var_symbol == None: var_symbol = VarSymbol(id=var_name, type='int', line=ctx.start.line, size=8, mem=self.st.stack_pointer) self.st.addSymbol(var_symbol) var_addr = var_symbol.getAddr() self.body += '\tmovq %rax, -' + str(var_addr[0]) + '(%rsp)\n' else: print('[Error]:', var_symbol.id + ', declared on line', ctx.start.line, 'has already been declared on line', var_symbol.line) visit = self.visitChildren(ctx) return visit # Visit the statement node, handles constructs such as IF statements and FOR loops. def visitStatement(self, ctx:DecafParser.StatementContext): if ctx.CONTINUE() != None: self.body += '\tjmp main\n' if ctx.BREAK() != None: self.body += '\tjmp main\n' if ctx.IF(): self.st.enterScope() if_label = 'if-label-'+str(self.IF_LABEL_COUNT) self.body += '\tcmp %r11 %r10\n' self.body += '\tjl '+if_label+'l\n' self.body += '\tje '+if_label+'e\n' self.body += '\tjg '+if_label+'g\n' self.body += '\tret\n' self.body += if_label+':\n' self.IF_LABEL_COUNT = self.IF_LABEL_COUNT + 1 ctx.expr() self.st.exitScope() if ctx.RETURN(): if ctx.expr(): return_value = str(ctx.expr(0).getText()) self.body += '\tmovq $'+return_value+', %rax\n' self.body += '\tret\n' else: self.body += '\tret\n' if ctx.FOR(): self.st.enterScope() start_value = ctx.expr(0) end_value = ctx.expr(1) self.body += '\tmovq $1, %rbx\n' self.body += '\tjmp begin-for-'+str(self.LOOP_COUNT)+'\n' self.body += 'begin-for-'+str(self.LOOP_COUNT)+':\n' self.body += '\tcmp $'+str(end_value)+ ', %rbx\n' self.body += '\tjge end-for-'+str(self.LOOP_COUNT)+'\n' visit = self.visitChildren(ctx) self.body += '\taddq $1, %rbx\n' self.body += '\tjmp begin-for-'+str(self.LOOP_COUNT)+'\n' self.body += 'end-for-'+str(self.LOOP_COUNT)+':\n' self.body += '\tret\n' self.LOOP_COUNT = self.LOOP_COUNT + 1 self.st.exitScope() visit = self.visitChildren(ctx) return visit # Visit field declaration node, handles assignment of arrays. def visitField_decl(self, ctx:DecafParser.Field_declContext): for i in range(len(ctx.field_name())): var_name = ctx.field_name(i).getText() var_symbol = self.st.probe(var_name) # Declaration is an array. if "[" in var_name: array_var_name = ctx.field_name(i).getText() split_var = array_var_name.split('[', 1)[0] if var_symbol == None: var_symbol = VarSymbol(id=split_var, type='int', line=ctx.start.line, size=8, mem=self.st.stack_pointer) self.st.addSymbol(var_symbol) var_addr = var_symbol.getAddr() self.body += '\tmovq %rax, -' + str(var_addr[0]) + '(%rsp)\n' else: if var_symbol == None: var_symbol = VarSymbol(id=var_name, type='int', line=ctx.start.line, size=8, mem=self.st.stack_pointer) self.st.addSymbol(var_symbol) var_addr = var_symbol.getAddr() self.body += '\tmovq %rax, -' + str(var_addr[0]) + '(%rsp)\n' else: print('[Error]:', var_symbol.id + ', declared on line', ctx.start.line, 'has already been declared on line', var_symbol.line) visit = self.visitChildren(ctx) return visit # Visit method call node, checks if method exists. def visitMethod_call(self, ctx:DecafParser.Method_callContext): method_name = ctx.method_name() method_symbol = self.st.lookup(method_name) if not ctx.callout_arg(): if method_symbol == None: print('[Error]: Call to a function that does not exist: ' + str(method_name) + ' on line: ' + str(ctx.start.line)) else: self.body += '\tjmp '+method_name+'\n' visit = self.visitChildren(ctx) return visit # Visits callout arg node, handles adding strings to the head and printing text. def visitCallout_arg(self, ctx:DecafParser.Callout_argContext): self.head += 'string'+str(self.CALLOUT_COUNT)+': .asciz '+str(ctx.STRING_LITERAL())+'\n' self.body += '\tmovq $'+str(self.CALLOUT_COUNT)+', %rdi\n' self.body += '\tsubq $8, %rsp\n' self.body += '\tcall printf\n' self.body += '\taddq $8, %rsp\n' self.CALLOUT_COUNT = self.CALLOUT_COUNT + 1 visit = self.visitChildren(ctx) return visit
def __init__(self): super().__init__() self.st = SymbolTable() self.head = '.data\n' self.body = '.global main\n'
class Parser: """ Encapsulates access to the input code. Reads an assembly language command, parses it, and provides convenient access to the commands components (fields and symbols). In addition, removes all white space and comments. """ # init -> line sectioning -> read_labels -> read_A / read_C def __init__(self, file_name: str): """ Opens the input file/stream and gets ready to parse it. :param file_name: file name to open """ self.file = open(file_name) self.lines = self.file.readlines() self.clean_comments() self.current_line_index = 0 self.current_line = self.lines[self.current_line_index] self.line_amount = len(self.lines) self.binary_lines = list() self.symbol_table = SymbolTable() self.labels = dict() self.next_address = FIRST_ADDRESS def clean_comments(self): """ method to clean the comments from the code lines :return: none """ new_lines = list() for line in self.lines: if ((not line.startswith("//")) & (not line.isspace()) & (not line.startswith("/*") & (not line.startswith("*/")))): new_lines.append(line) self.lines = new_lines def parse(self): """ method to parse out the file :return: none """ self.read_labels() self.read_instructions() return self.binary_lines def strip_line(self): """ method to strip \n or \t or spaces in line :return: none """ self.current_line = self.current_line.strip() self.current_line = self.current_line.rstrip('\n') self.current_line = self.current_line.rstrip('\t') self.current_line = (self.current_line.split("//"))[0] self.current_line = self.current_line.replace(" ", "") def line_sectioning(self): """ method to section lines of file by variable lines or label lines :return: none """ # start sectioning while self.current_line is not None: # stripping from whitespaces and end line self.strip_line() self.lines[self.current_line_index] = self.current_line self.advance() def has_more_commands(self): """ Are there more commands in the input? :return: true if has, false otherwise """ if self.current_line_index + 1 < self.line_amount: return True return False def advance(self): """ Reads the next command from the input and makes it the current command. Should be called only if hasMoreCommands() is true. Initially there is no current command. :return: none """ if not self.has_more_commands(): self.current_line = None while self.has_more_commands() & (self.current_line_index + 1 < self.line_amount): self.current_line_index = self.current_line_index + 1 self.current_line = self.lines[self.current_line_index] # we shall not ignore: if not self.check_ignored_line(): break def check_ignored_line(self): """ check if a line should be ignored :return: true for ignored, false otherwise """ if self.current_line.isspace(): return True elif self.current_line.startswith(COMMENT): return True else: return False def get_address_for_symbol(self): """ get next free address for a symbol in symbol table :return: int of address """ address = self.next_address symbol_table = self.symbol_table while symbol_table.is_occupied(address): address = address + 1 self.next_address = address + 1 return address def read_labels(self): """ read all labels inside the asm file and update symbol table accordingly :return: none """ index = 0 for line in self.lines: # if we are on a label: if Parser.line_command_type(line) == CommandType.L_COMMAND: # remove "(" and ")" from beginning and end. length_of_name = len(line) label_name = line[1:length_of_name - 1] # add to symbol table binary = Parser.decimal_to_binary(index) self.labels[label_name] = binary # label index is the number of line # the binary code of the label is its address else: index = index + 1 @staticmethod def from_array_to_string(binary_string): """ method to turn from an array of {0,1} into a string of {1,0} :param binary_string: binary array string :return: binary string """ binary = "" binary = binary.join(binary_string) return binary def read_instructions(self): """ method to read instructions of asm file :return: none """ # instruction_lines: keys- line index, items- the line itself for instruction in self.lines: if Parser.line_command_type(instruction) == CommandType.A_COMMAND: binary = self.read_A_instruction(instruction) binary = Parser.from_array_to_string(binary) self.binary_lines.append(binary) elif Parser.line_command_type(instruction) == \ CommandType.C_COMMAND: binary = self.read_C_instruction(instruction) binary = Parser.from_array_to_string(binary) self.binary_lines.append(binary) def read_A_instruction(self, line: str): """ read A instruction inside the asm file and update symbol table accordingly :return: binary code of line """ if line[1].isdecimal(): binary = Parser.decimal_to_binary(int(line[1:])) elif self.symbol_table.contains(line[1:]): binary = Parser.decimal_to_binary( self.symbol_table.get_address(line[1:])) binary = Parser.from_array_to_string(binary) elif line[1:] in self.labels.keys(): binary = self.labels.get(line[1:]) else: address = self.get_address_for_symbol() self.symbol_table.add_entry(line[1:], address) binary = Parser.decimal_to_binary(address) return Parser.from_array_to_string(binary) @staticmethod def read_C_instruction(line: str): """ read C instruction inside the asm file and update symbol table accordingly :return: binary code of line """ t1, t2, t3 = Parser.parse_C_command(line) # constructing string binary = "" # if (t2.isnumeric()) & (t1 == "A"): # binary = (bin(int(t2)))[2:] # rest = (16 - len(binary))*"0" # binary = rest + binary # return binary # else: # COMP, DESTINATIONS,JUMPS are dictionaries of fitting given commands binary = binary + COMP[t2] binary = binary + DESTINATIONS[t1] binary = binary + JUMPS[t3] return binary def read_L_instruction(self, label_name: str): """ read L instruction inside the asm file :return: binary code of line """ address = self.symbol_table.get_address(label_name) binary = Parser.decimal_to_binary(address) return Parser.from_array_to_string(binary) @staticmethod def decimal_to_binary(address): # because A instruction, first bit is 0->(15 bits we will display) binary = ['0'] * SIXTEEN # turns address into bin representation, for int. binary_address = (bin(address))[2:] binary_length = len(binary_address) from_index = SIXTEEN - binary_length # padding the string into binary. binary[from_index:SIXTEEN] = binary_address return binary def command_type(self): """ Returns the type of the current command: - A_COMMAND for @Xxx where Xxx is either a symbol or a decimal number - C_COMMAND for dest=comp;jump - L_COMMAND (actually, pseudo- command) for (Xxx) where Xxx is a symbol. :return: enum of command type """ if self.current_line.startswith(LABEL_START): return CommandType.L_COMMAND elif self.current_line.startswith(A_COMMAND_START): return CommandType.A_COMMAND else: # already deleted comments, so no need to check for that return CommandType.C_COMMAND @staticmethod def line_command_type(line: str): """ Returns the type of the current command, out of the line itself. - A_COMMAND for @Xxx where Xxx is either a symbol or a decimal number - C_COMMAND for dest=comp;jump - L_COMMAND (actually, pseudo- command) for (Xxx) where Xxx is a symbol. :return: enum of command type """ if line.startswith(A_COMMAND_START): return CommandType.A_COMMAND elif line.startswith(LABEL_START): return CommandType.L_COMMAND else: return CommandType.C_COMMAND @staticmethod def parse_C_command(line): """ parsing a c command method, into 3 sections- dest, comp and jmp. :param line: given line to parse into a c command :return: a triplet of (dest, comp, jump) """ # line looks like dest=comp;jmp # gives us [dest=comp],[jmp] # in an array # then we split second array to # [dest][comp] split_by_comma = line.split(';') split_by_equal = split_by_comma[0].split('=') if len(split_by_comma) == 1: # no jmp return split_by_equal[0], split_by_equal[1], None elif len(split_by_equal) == 1: # no dest return None, split_by_comma[0], split_by_comma[1] else: # dest and jmp return split_by_equal[0], split_by_equal[1], split_by_comma[1]
class CompilationEngine: def __init__(self, tokenizer: JackTokenizer, jack_file): self.tokenizer = tokenizer self.class_name = '' log_file_name = jack_file.name.replace('.jack', '_engine.xml') self.log_file = open(log_file_name, 'w') log_file_name = jack_file.name.replace('.jack', '.vm') self.output_file = open(log_file_name, 'w') self.symbol_table = SymbolTable() self.vm_writer = VMWriter(self.output_file) self.while_label_index = 0 self.if_label_index = 0 def compile(self): self.compile_class(0) def advance(self): """return current token""" return self.tokenizer.advance() def next(self) -> Token: return self.tokenizer.next() def compile_token(self, token, indentation, limits=None): print(token.content, end=' ') if limits is not None: if isinstance(limits, list) and token.token_type not in limits: raise RuntimeError(token, 'can be only', limits) if isinstance(limits, str) and token.content != limits: raise RuntimeError(token, 'can be only', limits) self.log(token, indentation) def log_node(self, msg, indentation): space = '' for i in range(0, indentation): space += ' ' self.log_file.write('{1}<{0}>\n'.format(msg, space)) def log(self, token, indentation): txt = token.content if txt == '<': txt = '<' elif txt == '>': txt = '>' elif txt == '\"': txt = '"' elif txt == '&': txt = '&' space = '' for i in range(0, indentation): space += ' ' # 2 spaces self.log_file.write('{2}<{0}> {1} </{0}>\n'.format( token.token_type, txt, space)) def compile_class(self, indentation): """ Compiles a complete class. """ self.log_file.write('<class>\n') # 'class' advance = self.advance() self.compile_token(advance, indentation + 1) # class name advance = self.advance() self.class_name = advance.content self.compile_token(advance, indentation + 1) # set class name to vm-writer self.vm_writer.set_class_name(advance.content) # { advance = self.advance() self.compile_token(advance, indentation + 1, "{") # classVarDec* subroutineDec* advance = self.advance() while advance.content != '}': if (advance.content == 'constructor' or advance.content == 'function' or advance.content == 'method'): self.compile_subroutine(advance, indentation + 1) elif advance.content in ['field', 'static']: self.compile_class_var_dec(advance, indentation + 1) elif advance.content != '}': raise RuntimeError( advance, 'Only subroutine and variable can be declared here') advance = self.advance() # } self.compile_token(advance, indentation + 1, '}') self.log_file.write('</class>\n') self.log_file.flush() print("\ncompilation success") return def compile_class_var_dec(self, token, indentation): """ passing token as an argument, because the caller has already called the advance function once Compiles a static declaration or a field declaration. """ self.log_node('classVarDec', indentation) # static or field kind = token.content.upper() self.compile_token(token, indentation + 1) token = self.advance() var_type = token.content self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) # var name token = self.advance() var_name = token.content self.compile_token(token, indentation + 1, [IDENTIFIER]) self.symbol_table.define(var_name, var_type, kind) token = self.advance() while token.content == ',': self.compile_token(token, indentation + 1, ',') token = self.advance() var_name = token.content self.symbol_table.define(var_name, var_type, kind) self.compile_token(token, indentation + 1, [IDENTIFIER]) token = self.advance() # ; self.compile_token(token, indentation + 1, ';') self.log_node('/classVarDec', indentation) return def compile_subroutine(self, token, indentation): """ Compiles a complete method, function, or constructor. """ # reset symbol table for subroutine self.symbol_table.start_subroutine() self.log_node('subroutineDec', indentation) # function/method/constructor function_type = token.content self.compile_token(token, indentation + 1) # void | type token = self.advance() self.compile_token(token, indentation + 1) # subroutine name token = self.advance() subroutine_name = token.content self.compile_token(token, indentation + 1) # ( token = self.advance() self.compile_token(token, indentation + 1) # parameter list exists if function_type == 'method': self.symbol_table.define('this_placeholder', "THIS", ARG) pass token = self.advance() self.compile_parameter_list(token, indentation + 1) if token.content != ')': token = self.advance() # ) self.compile_token(token, indentation + 1, ')') # { token = self.advance() self.compile_subroutine_body(token, indentation + 1, subroutine_name, function_type) self.log_node('/subroutineDec', indentation) return def compile_subroutine_body(self, token, indentation, subroutine_name, function_type='function'): self.log_node('subroutineBody', indentation) self.compile_token(token, indentation + 1, '{') token = self.advance() n_locals = 0 if token.content == 'var': n_locals = self.compile_var_dec(token, indentation + 1) token = self.advance() self.vm_writer.write_functions(subroutine_name, n_locals) # todo 处理constructor if function_type == 'constructor': # number of fields self.vm_writer.write_push('CONST', self.symbol_table.var_count(FIELD)) self.vm_writer.write_call('Memory.alloc', 1) self.vm_writer.write_pop('POINTER', 0, 'set this pointer') elif function_type == 'method': # if it is a method, always set arg 0 to pointer 0(this) self.vm_writer.write_push(ARG, 0) self.vm_writer.write_pop('POINTER', 0) pass # if this token is '}' means the function has an empty body if token.content == '}': # TODO 空函数体的处理 # empty body print('empty body', token) pass else: self.compile_statements(token, indentation + 1) token = self.advance() self.compile_token(token, indentation + 1, '}') self.log_node('/subroutineBody', indentation) def compile_parameter_list(self, token, indentation): """Compiles a (possibly empty) parameter list, not including the enclosing ‘‘ () ’’.""" self.log_node('parameterList', indentation) while token.content != ')': param_symbol = Symbol() param_symbol.kind = ARG # parameter type self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) param_symbol.symbol_type = token.content # parameter name token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) param_symbol.name = token.content self.symbol_table.define_symbol(param_symbol) if self.next() is not None and self.next().content == ',': # compile , token = self.advance() self.compile_token(token, indentation + 1) token = self.advance() continue elif self.next() is not None and self.next().content == ')': # this function does not consumes ')' so didn't call advance() break else: token = self.advance() self.log_node('/parameterList', indentation) return def compile_var_dec(self, token, indentation) -> int: """ Compiles a var declaration.""" # var_symbol = Symbol() # # var # self.compile_token(token, indentation + 1, 'var') # var_symbol.kind = VAR # # var type # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) # var_symbol.symbol_type = token.content # # var name # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) # var_symbol.name = token.content # # , or ; # token = self.advance() # while token.content != ';': # self.compile_token(token, indentation + 1, ',') # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) # token = self.advance() # self.compile_token(token, indentation + 1, ';') var_count = 0 while token.content == 'var': self.log_node('varDec', indentation) var_count += 1 var_symbol = Symbol() # var self.compile_token(token, indentation + 1, 'var') var_symbol.kind = VAR # var type token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) var_symbol.symbol_type = token.content # var name token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) var_symbol.name = token.content self.symbol_table.define_symbol(var_symbol) # next token may be ',' or ';' token = self.advance() # if next token is ',' while token.content == ',': var_count += 1 self.compile_token(token, indentation + 1, ',') # var name token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER]) # only name differs, types are the same self.symbol_table.define(token.content, var_symbol.symbol_type, VAR) token = self.advance() if token.content == ';': self.compile_token(token, indentation + 1, ';') if self.next().content == 'var': token = self.advance() self.log_node('/varDec', indentation) return var_count def compile_statements(self, token, indentation): """Compiles a sequence of statements, not including the enclosing ‘‘{}’’.""" self.log_node('statements', indentation) while token.content != '}': if token.content == 'let': self.compile_let(token, indentation + 1) pass elif token.content == 'if': self.compile_if(token, indentation + 1) pass elif token.content == 'while': self.compile_while(token, indentation + 1) pass elif token.content == 'do': self.compile_do(token, indentation + 1) pass elif token.content == 'return': self.compile_return(token, indentation + 1) pass else: raise RuntimeError('unknown type in statements %s') if self.next() is not None and self.next().content == '}': break else: token = self.advance() self.log_node('/statements', indentation) return def compile_do(self, token: Token, indentation): self.log_node('doStatement', indentation) self.compile_token(token, indentation + 1, 'do') token = self.advance() self.compile_term(token, indentation + 1, do_term=True) self.vm_writer.write_pop('TEMP', 0, 'do call') token = self.advance() self.compile_token(token, indentation + 1, ';') # maybe a local subroutine or someone else's # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER]) # function_class_name = token.content # token = self.advance() # if token.content == '.': # # someone else 's # self.compile_token(token, indentation + 1, '.') # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER]) # function_name = token.content # token = self.advance() # self.compile_token(token, indentation + 1, '(') # token = self.advance() # n_arg = self.compile_expression_list(token, indentation + 1) # self.vm_writer.write_call(function_class_name + '.' + function_name, n_arg) # # do calls must 'pop temp 0', because void functions always returns 0 # self.vm_writer.write_pop('TEMP', 0, 'do call') # if token.content != ')': # token = self.advance() # self.compile_token(token, indentation + 1, ')') # pass # else: # self.compile_token(token, indentation + 1, '(') # token = self.advance() # self.compile_expression_list(token, indentation + 1) # if token.content != ')': # token = self.advance() # self.compile_token(token, indentation + 1, ')') # # local method # pass # token = self.advance() # self.compile_token(token, indentation + 1, ';') self.log_node('/doStatement', indentation) return def compile_let(self, token: Token, indentation): """let length = Keyboard.readInt("HOW MANY NUMBERS? ");""" self.log_node('letStatement', indentation) # let self.compile_token(token, indentation + 1, 'let') # length token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER]) var_name = token.content # = or [ token = self.advance() array = False if token.content == '[': array = True self.compile_token(token, indentation + 1, '[') token = self.advance() # e.g x[y] # push y to stack self.compile_expression(token, indentation + 1) token = self.advance() self.compile_token(token, indentation + 1, ']') token = self.advance() # push x to stack self.write_push(var_name) # add x and y self.vm_writer.write_arithmetic('ADD') # # pop the result to THAT # self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_pop('TEMP', 2) pass self.compile_token(token, indentation + 1, '=') # expression token = self.advance() self.compile_expression(token, indentation + 1) if array: self.vm_writer.write_push('TEMP', 2) self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_pop('THAT', 0) pass else: if self.symbol_table.kind_of(var_name) == VAR: self.vm_writer.write_pop('LOCAL', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == ARG: self.vm_writer.write_pop('ARG', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == FIELD: self.vm_writer.write_pop('THIS', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == STATIC: self.vm_writer.write_pop('STATIC', self.symbol_table.index_of(var_name), var_name) pass # ; token = self.advance() self.compile_token(token, indentation + 1, ';') self.log_node('/letStatement', indentation) return def write_push(self, var_name): if self.symbol_table.kind_of(var_name) == VAR: self.vm_writer.write_push('LOCAL', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == ARG: self.vm_writer.write_push('ARG', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == FIELD: self.vm_writer.write_push('THIS', self.symbol_table.index_of(var_name), var_name) pass def compile_while(self, token: Token, indentation): while_label_pre = 'WHILE_%s' % self.while_label_index # label index++ self.while_label_index += 1 self.vm_writer.write_label('%s_EXP' % while_label_pre) self.log_node('whileStatement', indentation) self.compile_token(token, indentation + 1, 'while') token = self.advance() self.compile_token(token, indentation + 1, '(') token = self.advance() self.vm_writer.write_comment("calculating while condition expression") # expression self.compile_expression(token, indentation + 1) # ) token = self.advance() self.compile_token(token, indentation + 1, ')') self.vm_writer.write_arithmetic('NOT') # checking condition expression self.vm_writer.write_if('%s_END' % while_label_pre) # { token = self.advance() self.compile_token(token, indentation + 1, '{') # statements token = self.advance() if token.content != '}': # not empty statement self.compile_statements(token, indentation + 1) # } token = self.advance() self.compile_token(token, indentation + 1, '}') self.vm_writer.write_goto('%s_EXP' % while_label_pre) self.vm_writer.write_label('%s_END' % while_label_pre) self.log_node('/whileStatement', indentation) return def compile_return(self, token: Token, indentation): self.log_node('returnStatement', indentation) self.compile_token(token, indentation + 1, 'return') token = self.advance() if token.content != ';': self.compile_expression(token, indentation + 1) token = self.advance() self.vm_writer.write_return() else: # for functions that return void, it must return an integer 0 self.vm_writer.write_return(True) pass self.compile_token(token, indentation + 1, ';') self.log_node('/returnStatement', indentation) return def compile_if(self, token: Token, indentation): # if_label_pre = 'IF_%s' % self.if_label_index else_label = 'ELSE_%s' % self.if_label_index finish_label = 'FINISH_%s' % self.if_label_index # label index++ self.if_label_index += 1 self.log_node('ifStatement', indentation) self.compile_token(token, indentation + 1, 'if') token = self.advance() self.compile_token(token, indentation + 1, '(') self.vm_writer.write_comment("calculating if condition expression") token = self.advance() # expression self.compile_expression(token, indentation + 1) # ) token = self.advance() self.compile_token(token, indentation + 1, ')') self.vm_writer.write_arithmetic('NOT') self.vm_writer.write_if(else_label) # { token = self.advance() self.compile_token(token, indentation + 1, '{') # statements token = self.advance() if token.content != '}': # not empty statement self.compile_statements(token, indentation + 1) # } token = self.advance() self.compile_token(token, indentation + 1, '}') if self.next().content == 'else': """ if statements... (else vm code) goto FINISH // if statements finished, pass the else code lable ELSE else statements... label FINISH """ self.vm_writer.write_goto(finish_label) self.vm_writer.write_label(else_label) token = self.advance() self.compile_token(token, indentation + 1, 'else') token = self.advance() self.compile_token(token, indentation + 1, '{') token = self.advance() self.compile_statements(token, indentation + 1) token = self.advance() self.compile_token(token, indentation + 1, '}') self.vm_writer.write_label(finish_label) else: """ if statements... (no else vm code) label ELSE """ self.vm_writer.write_label(else_label) pass self.log_node('/ifStatement', indentation) return def compile_expression(self, token, indentation): self.log_node('expression', indentation) self.compile_term(token, indentation + 1) while self.next() is not None and self.next( ).content in OP_SYMBOLS.keys(): token = self.advance() self.compile_token(token, indentation + 1, [SYMBOL]) op_symbol = OP_SYMBOLS[token.content] token = self.advance() self.compile_term(token, indentation + 1) # call op function after pushes the second parameter self.vm_writer.write_arithmetic(op_symbol) self.log_node('/expression', indentation) return def compile_term(self, token: Token, indentation, do_term=False): if not do_term: self.log_node('term', indentation) if token.token_type == INT_CONST: self.compile_token(token, indentation + 1, [INT_CONST]) # todo self.vm_writer.write_push('CONST', token.content) pass elif token.token_type == STRING_CONST: """ // construct a string push constant 3 call String.new 1 // the address of string is now on the stack push constant 72 call String.appendChar 2 push constant 73 call String.appendChar 2 push constant 74 call String.appendChar 2 // string construct complete its still on sp """ length = len(token.content) self.vm_writer.write_push('CONST', length) self.vm_writer.write_call('String.new', 1) for c in token.content: self.vm_writer.write_push('CONST', ord(c)) self.vm_writer.write_call('String.appendChar', 2) pass self.compile_token(token, indentation + 1) # keyword constant elif token.content == 'true': self.compile_token(token, indentation + 1) self.vm_writer.write_push('CONST', 1) self.vm_writer.write_arithmetic('NEG') pass elif token.content == 'false': self.compile_token(token, indentation + 1) self.vm_writer.write_push('CONST', 0) pass elif token.content == 'this': self.compile_token(token, indentation + 1) self.vm_writer.write_push('POINTER', 0) pass elif token.content == 'null': self.compile_token(token, indentation + 1) self.vm_writer.write_push('CONST', 0) pass elif token.content in ['true', 'false', 'null', 'this']: self.compile_token(token, indentation + 1) self.vm_writer.write_push('POINTER', 0) # self.vm_writer.write_comment('%s not implemented' % token.content) pass elif self.next().content == '[': self.compile_token(token, indentation + 1, [IDENTIFIER]) self.write_push(token.content) token = self.advance() self.compile_token(token, indentation + 1, '[') token = self.advance() self.compile_expression(token, indentation + 1) token = self.advance() self.compile_token(token, indentation + 1, ']') self.vm_writer.write_arithmetic('ADD') self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('THAT', 0) pass elif token.content == '(': self.compile_token(token, indentation + 1, '(') token = self.advance() self.compile_expression(token, indentation + 1) if token.content != ')': token = self.advance() self.compile_token(token, indentation + 1, ')') pass elif token.content in UNARY_OP_SYMBOL.keys(): self.compile_token(token, indentation + 1) unary_op = UNARY_OP_SYMBOL[token.content] token = self.advance() self.compile_term(token, indentation + 1) self.vm_writer.write_arithmetic(unary_op) # elif self.next().content == ';': # # varname # self.compile_token(token, indentation + 1) # pass elif self.next().content == '(': # method call n_arg = 1 self.vm_writer.write_push('POINTER', 0) # self.vm_writer.write_pop(ARG, 0) function_class_name = self.class_name function_name = token.content self.compile_token(token, indentation + 1, [IDENTIFIER]) token = self.advance() self.compile_token(token, indentation + 1, '(') token = self.advance() n_arg += self.compile_expression_list(token, indentation + 1) self.vm_writer.write_call( function_class_name + '.' + function_name, n_arg) if token.content != ')': token = self.advance() self.compile_token(token, indentation + 1, ')') pass elif self.next().content == '.': # static function call # class name n_arg = 0 function_class_name = token.content if self.symbol_table.index_of(function_class_name) > -1: n_arg += 1 self.vm_writer.write_push( self.symbol_table.kind_of(function_class_name), self.symbol_table.index_of(function_class_name), function_class_name) function_class_name = self.symbol_table.type_of( function_class_name) self.compile_token(token, indentation + 1, [IDENTIFIER]) token = self.advance() self.compile_token(token, indentation + 1, '.') # function name token = self.advance() function_name = token.content self.compile_token(token, indentation + 1, [IDENTIFIER]) token = self.advance() self.compile_token(token, indentation + 1, '(') token = self.advance() n_arg += self.compile_expression_list(token, indentation + 1) self.vm_writer.write_call( function_class_name + '.' + function_name, n_arg) if token.content != ')': token = self.advance() self.compile_token(token, indentation + 1, ')') pass elif token.token_type == IDENTIFIER: # varName self.compile_token(token, indentation + 1, [IDENTIFIER]) # todo 处理不同情形 if self.symbol_table.kind_of(token.content) == VAR: self.vm_writer.write_push( 'LOCAL', self.symbol_table.index_of(token.content), token.content) elif self.symbol_table.kind_of(token.content) == ARG: self.vm_writer.write_push( 'ARG', self.symbol_table.index_of(token.content), token.content) pass elif self.symbol_table.kind_of(token.content) == FIELD: self.vm_writer.write_push( 'FIELD', self.symbol_table.index_of(token.content), token.content) pass elif self.symbol_table.kind_of(token.content) == STATIC: self.vm_writer.write_push( 'STATIC', self.symbol_table.index_of(token.content), token.content) pass pass else: raise RuntimeError("Uncaught situation", token) if not do_term: self.log_node('/term', indentation) return def compile_expression_list(self, token: Token, indentation) -> int: self.log_node('expressionList', indentation) n_expression = 0 while token.content != ')': n_expression += 1 self.compile_expression(token, indentation + 1) if self.next() is not None and self.next().content == ',': # multiple expression list token = self.advance() self.compile_token(token, indentation + 1, ',') token = self.advance() elif self.next() is not None and self.next().content == ')': break else: print('UNEXPECTED token in compile_expression_list', token) token = self.advance() self.log_node('/expressionList', indentation) return n_expression
class CompilationEngine(): """ compiles a jack source file from a jack tokenizer into xml form in output_file NOTE: ASSUMES ERROR FREE CODE -> a todo could be to add error handling """ SYMBOL_KINDS = {'parameter_list': 'argument', 'var_dec': 'local'} STARTING_TOKENS = { 'var_dec': ['var'], 'parameter_list': ['('], 'subroutine_body': ['{'], 'expression_list': ['('], 'expression': ['=', '[', '('], 'array': ['['], 'conditional': ['if', 'else'] } TERMINATING_TOKENS = { 'class': ['}'], 'class_var_dec': [';'], 'subroutine': ['}'], 'parameter_list': [')'], 'expression_list': [')'], 'statements': ['}'], 'do': [';'], 'let': [';'], 'while': ['}'], 'if': ['}'], 'var_dec': [';'], 'return': [';'], 'expression': [';', ')', ']', ','], 'array': [']'] } TOKENS_THAT_NEED_LABELS = ['if', 'while'] def __init__(self, tokenizer, output_file): self.tokenizer = tokenizer self.output_file = output_file self.class_symbol_table = SymbolTable() self.subroutine_symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) self.label_counter = LabelCounter(labels=self.TOKENS_THAT_NEED_LABELS) self.class_name = None def compile_class(self): """ everything needed to compile a class, the basic unit of compilation """ # skip everything up to class start while not self.tokenizer.class_token_reached(): self.tokenizer.advance() # since compilation unit is a class makes sense to store this as instance variable self.class_name = self.tokenizer.next_token.text while self.tokenizer.has_more_tokens: self.tokenizer.advance() if self.tokenizer.current_token.starts_class_var_dec(): self.compile_class_var_dec() elif self.tokenizer.current_token.starts_subroutine(): self.compile_subroutine() def compile_class_var_dec(self): """ example: field int x; """ symbol_kind = self.tokenizer.keyword() # get symbol type self.tokenizer.advance() symbol_type = self.tokenizer.keyword() # get all identifiers while self._not_terminal_token_for('class_var_dec'): self.tokenizer.advance() if self.tokenizer.identifier(): # add symbol to class symbol_name = self.tokenizer.identifier() self.class_symbol_table.define(name=symbol_name, kind=symbol_kind, symbol_type=symbol_type) def compile_subroutine(self): """ example: methoid void dispose() { ... """ # new subroutine means new subroutine scope self.subroutine_symbol_table.reset() # get subroutine name self.tokenizer.advance() self.tokenizer.advance() subroutine_name = self.tokenizer.current_token.text # compile parameter list self.tokenizer.advance() self.compile_parameter_list() # compile body self.tokenizer.advance() self.compile_subroutine_body(subroutine_name=subroutine_name) # rest counts from subroutine self.label_counter.reset_counts() def compile_subroutine_body(self, subroutine_name): # skip start self.tokenizer.advance() # get all locals num_locals = 0 while self._starting_token_for('var_dec'): num_locals += self.compile_var_dec() self.tokenizer.advance() # write function command self.vm_writer.write_function(name='{}.{}'.format( self.class_name, subroutine_name), num_locals=num_locals) # compile all statements while self._not_terminal_token_for('subroutine'): self.compile_statements() def compile_parameter_list(self): """ example: dispose(int a, int b) returns number of params found """ ### symbol table while self._not_terminal_token_for('parameter_list'): self.tokenizer.advance() # symbol table if self.tokenizer.next_token.is_identifier(): symbol_kind = self.SYMBOL_KINDS['parameter_list'] symbol_type = self.tokenizer.current_token.text symbol_name = self.tokenizer.next_token.text self.subroutine_symbol_table.define(name=symbol_name, kind=symbol_kind, symbol_type=symbol_type) # 'var' type varName (',' varName)* ';' def compile_var_dec(self): """ example: var int a; """ # skip var self.tokenizer.advance() # get symbol type symbol_type = self.tokenizer.current_token.text # count number of vars, i.e., var int i, sum = 2 num_vars = 0 # get all vars while self._not_terminal_token_for('var_dec'): self.tokenizer.advance() if self.tokenizer.identifier(): num_vars += 1 symbol_kind = self.SYMBOL_KINDS['var_dec'] symbol_name = self.tokenizer.identifier() self.subroutine_symbol_table.define(name=symbol_name, kind=symbol_kind, symbol_type=symbol_type) # return vars processed return num_vars def compile_statements(self): """ call correct statement """ # TODO: way to make this global for class? statement_compile_methods = { 'if': self.compile_if, 'do': self.compile_do, 'let': self.compile_let, 'while': self.compile_while, 'return': self.compile_return } while self._not_terminal_token_for('subroutine'): if self.tokenizer.current_token.is_statement_token(): statement_type = self.tokenizer.current_token.text statement_compile_methods[statement_type]() self.tokenizer.advance() def compile_do(self): """ example: do square.dispose(); """ # get to caller self.tokenizer.advance() # set caller_name caller_name = self.tokenizer.current_token.text # look up in symbol table symbol = self._find_symbol_in_symbol_tables(symbol_name=caller_name) # skip . self.tokenizer.advance() # subroutine name self.tokenizer.advance() # set subroutine name subroutine_name = self.tokenizer.current_token.text if symbol: # user defined Method # push value onto local segment segment = 'local' index = symbol['index'] symbol_type = symbol['type'] self.vm_writer.write_push(segment=segment, index=index) else: # i.e, OS call symbol_type = caller_name subroutine_call_name = symbol_type + '.' + subroutine_name # start expression list self.tokenizer.advance() # get arguments in expession list num_args = self.compile_expression_list() # method call if symbol: # calling object passed as implicit argument num_args += 1 # write call self.vm_writer.write_call(name=subroutine_call_name, num_args=num_args) # pop off return of previous call we don't care about self.vm_writer.write_pop(segment='temp', index='0') # 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): """ example: let direction = 0; """ # get symbol to store expression evaluation self.tokenizer.advance() symbol_name = self.tokenizer.current_token.text symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name) # array assignment? array_assignment = self._starting_token_for(keyword_token='array', position='next') if array_assignment: # get to index expression self.tokenizer.advance() self.tokenizer.advance() # compile it self.compile_expression() self.vm_writer.write_push(segment=symbol['kind'], index=symbol['index']) # add two addresses self.vm_writer.write_arithmetic(command='+') # go past = while not self.tokenizer.current_token.text == '=': self.tokenizer.advance() # compile all expressions while self._not_terminal_token_for('let'): self.tokenizer.advance() self.compile_expression() if not array_assignment: # store expression evaluation in symbol location self.vm_writer.write_pop(segment=symbol['kind'], index=symbol['index']) else: # array unloading # pop return value onto temp self.vm_writer.write_pop(segment='temp', index='0') # pop address of array slot onto THAT self.vm_writer.write_pop(segment='pointer', index='1') # pointer 1 => array # push value on temp back onto stack self.vm_writer.write_push(segment='temp', index='0') # set that self.vm_writer.write_pop(segment='that', index='0') # 'while' '(' expression ')' '{' statements '}' def compile_while(self): """ example: while (x > 0) { ... } """ # write while label self.vm_writer.write_label( label='WHILE_EXP{}'.format(self.label_counter.get('while'))) # advance to expression start ( self.tokenizer.advance() self.tokenizer.advance() # compile expression in () self.compile_expression() # NOT expression so for easily handling of termination and if-goto self.vm_writer.write_unary(command='~') self.vm_writer.write_ifgoto( label='WHILE_END{}'.format(self.label_counter.get('while'))) while self._not_terminal_token_for('while'): self.tokenizer.advance() if self._statement_token(): self.compile_statements() # write goto self.vm_writer.write_goto( label='WHILE_EXP{}'.format(self.label_counter.get('while'))) # write end label self.vm_writer.write_label( label='WHILE_END{}'.format(self.label_counter.get('while'))) # add while to labels count self.label_counter.increment('while') def compile_if(self): """ example: if (True) { ... } else { ... } """ # advance to expression start self.tokenizer.advance() self.tokenizer.advance() # compile expression in () self.compile_expression() # write ifgoto to if statement self.vm_writer.write_ifgoto( label='IF_TRUE{}'.format(self.label_counter.get('if'))) # write goto if false (else) self.vm_writer.write_goto( label='IF_FALSE{}'.format(self.label_counter.get('if'))) # write if label self.vm_writer.write_label( label='IF_TRUE{}'.format(self.label_counter.get('if'))) # body of if self.compile_conditional_body() # else? if self._starting_token_for(keyword_token='conditional', position='next'): # past closing { self.tokenizer.advance() # goto if end if this path wasn't hit self.vm_writer.write_goto( label='IF_END{}'.format(self.label_counter.get('if'))) # if false self.vm_writer.write_label( label='IF_FALSE{}'.format(self.label_counter.get('if'))) # compile else self.compile_conditional_body() # define IF_END self.vm_writer.write_label( label='IF_END{}'.format(self.label_counter.get('if'))) else: # no else present # go to end of if self.vm_writer.write_label( label='IF_FALSE{}'.format(self.label_counter.get('if'))) def compile_conditional_body(self): while self._not_terminal_token_for('if'): self.tokenizer.advance() if self._statement_token(): if self.tokenizer.current_token.is_if(): # add ifto labels count self.label_counter.increment('if') # compile nested if self.compile_statements() # subtract for exiting nesting self.label_counter.decrement('if') else: self.compile_statements() # term (op term)* def compile_expression(self): """ many examples..i,e., x = 4 """ # ops get compiled at end in reverse order in which they were added ops = [] while self._not_terminal_token_for('expression'): if self._subroutine_call(): self.compile_subroutine_call() elif self._array_expression(): self.compile_array_expression() elif self.tokenizer.current_token.text.isdigit(): self.vm_writer.write_push( segment='constant', index=self.tokenizer.current_token.text) elif self.tokenizer.identifier(): self.compile_symbol_push() elif self.tokenizer.current_token.is_operator( ) and not self._part_of_expression_list(): ops.insert( 0, Operator(token=self.tokenizer.current_token.text, category='bi')) elif self.tokenizer.current_token.is_unary_operator(): ops.insert( 0, Operator(token=self.tokenizer.current_token.text, category='unary')) elif self.tokenizer.string_const(): self.compile_string_const() elif self.tokenizer.boolean(): # boolean case self.compile_boolean() elif self._starting_token_for('expression'): # nested expression # skip starting ( self.tokenizer.advance() self.compile_expression() elif self.tokenizer.null(): self.vm_writer.write_push(segment='constant', index=0) self.tokenizer.advance() # compile_ops for op in ops: self.compile_op(op) def compile_op(self, op): """ example: +, /, etc. """ if op.unary(): self.vm_writer.write_unary(command=op.token) elif op.multiplication(): self.vm_writer.write_call(name='Math.multiply', num_args=2) elif op.division(): self.vm_writer.write_call(name='Math.divide', num_args=2) else: self.vm_writer.write_arithmetic(command=op.token) def compile_boolean(self): """ 'true' and 'false' """ self.vm_writer.write_push(segment='constant', index=0) if self.tokenizer.boolean() == 'true': # negate true self.vm_writer.write_unary(command='~') def compile_string_const(self): """ example: "Hello World" """ # handle string const string_length = len(self.tokenizer.string_const()) self.vm_writer.write_push(segment='constant', index=string_length) self.vm_writer.write_call(name='String.new', num_args=1) # build string from chars for char in self.tokenizer.string_const(): if not char == self.tokenizer.STRING_CONST_DELIMITER: ascii_value_of_char = ord(char) self.vm_writer.write_push(segment='constant', index=ascii_value_of_char) self.vm_writer.write_call(name='String.appendChar', num_args=2) def compile_symbol_push(self): """ example: x """ symbol = self._find_symbol_in_symbol_tables( symbol_name=self.tokenizer.identifier()) segment = symbol['kind'] index = symbol['index'] self.vm_writer.write_push(segment=segment, index=index) def compile_array_expression(self): """ example: let x = a[j], a[4] """ symbol_name = self.tokenizer.current_token.text symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name) # get to index expression self.tokenizer.advance() self.tokenizer.advance() # compile self.compile_expression() # push onto local array symbol self.vm_writer.write_push(segment='local', index=symbol['index']) # add two addresses: identifer and expression result self.vm_writer.write_arithmetic(command='+') # pop address onto pointer 1 / THAT self.vm_writer.write_pop(segment='pointer', index=1) # push value onto stack self.vm_writer.write_push(segment='that', index=0) def compile_subroutine_call(self): """ example: Memory.peek(8000) """ subroutine_name = '' while not self._starting_token_for('expression_list'): subroutine_name += self.tokenizer.current_token.text self.tokenizer.advance() # get num of args num_args = self.compile_expression_list() # write_call after pushing arguments onto stack self.vm_writer.write_call(name=subroutine_name, num_args=num_args) # (expression (',' expression)* )? def compile_expression_list(self): """ separeted out of compile_expression because of edge cases from normal expression example: (x, y, x + 5) """ num_args = 0 if self._empty_expression_list(): return num_args # start expressions self.tokenizer.advance() while self._not_terminal_token_for('expression_list'): num_args += 1 self.compile_expression() if self._another_expression_coming( ): # would be , after compile expression self.tokenizer.advance() return num_args def compile_return(self): """ example: return x; or return; """ if self._not_terminal_token_for(keyword_token='return', position='next'): self.compile_expression() else: # push constant for void self.vm_writer.write_push(segment='constant', index='0') self.tokenizer.advance() self.vm_writer.write_return() def _not_terminal_token_for(self, keyword_token, position='current'): if position == 'current': return not self.tokenizer.current_token.text in self.TERMINATING_TOKENS[ keyword_token] elif position == 'next': return not self.tokenizer.next_token.text in self.TERMINATING_TOKENS[ keyword_token] def _starting_token_for(self, keyword_token, position='current'): if position == 'current': return self.tokenizer.current_token.text in self.STARTING_TOKENS[ keyword_token] elif position == 'next': return self.tokenizer.next_token.text in self.STARTING_TOKENS[ keyword_token] def _statement_token(self): return self.tokenizer.current_token.is_statement_token() def _another_expression_coming(self): return self.tokenizer.current_token.is_expression_list_delimiter() def _find_symbol_in_symbol_tables(self, symbol_name): if self.subroutine_symbol_table.find_symbol_by_name(symbol_name): return self.subroutine_symbol_table.find_symbol_by_name( symbol_name) elif self.class_symbol_table.find_symbol_by_name(symbol_name): return self.class_symbol_table.find_symbol_by_name(symbol_name) def _empty_expression_list(self): return self._start_of_expression_list( ) and self._next_ends_expression_list() def _start_of_expression_list(self): return self.tokenizer.current_token.text in self.STARTING_TOKENS[ 'expression_list'] def _next_ends_expression_list(self): return self.tokenizer.next_token.text in self.TERMINATING_TOKENS[ 'expression_list'] def _subroutine_call(self): return self.tokenizer.identifier( ) and self.tokenizer.next_token.is_subroutine_call_delimiter() def _array_expression(self): return self.tokenizer.identifier() and self._starting_token_for( keyword_token='array', position='next') def _part_of_expression_list(self): return self.tokenizer.part_of_expression_list()
class CompilationEngine: def __init__(self, inputFile, outputFile): self.tokenizer = JackTokenizer(inputFile) self.vmWriter = VMWriter(outputFile) self.symbolTable = SymbolTable() self.classname = "" self.CompileClass() self.whilecounter = 0 self.ifcounter = 0 def CompileClass(self): #classname self.tokenizer.advance() self.classname = self.tokenizer.identifier() self.tokenizer.advance() # ignore { self.tokenizer.advance() while self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord() == "field": self.CompileClassVarDec() while self.tokenizer.keyWord() == "constructor" or self.tokenizer.keyWord() == "function" or self.tokenizer.keyWord() == "method": self.CompileSubroutine() #ignore } self.tokenizer.advance() def CompileClassVarDec(self): kind = self.tokenizer.keyWord() self.tokenizer.advance() type = self.compileType() name = self.tokenizer.identifier() self.symbolTable.define(name, type, kind) self.tokenizer.advance() # add the rest of var names, if there are while self.tokenizer.symbol() == ",": self.tokenizer.advance() name = self.tokenizer.identifier() self.symbolTable.define(name, type, kind) self.tokenizer.advance() # ignore ; self.tokenizer.advance() def CompileSubroutine(self): self.symbolTable.startSubroutine() self.ifcounter = 0 self.whilecounter = 0 # constructor | function | method functype = self.tokenizer.keyWord() self.tokenizer.advance() if functype == "method": self.symbolTable.define("this", self.classname, "arg") self.tokenizer.advance() subrotineName = self.classname + "." + self.tokenizer.identifier() self.tokenizer.advance() # ( parameterList ) self.tokenizer.advance() self.compileParameterList() self.tokenizer.advance() # subrotineBody # ignore { self.tokenizer.advance() # varDec* while self.tokenizer.keyWord() == "var": self.compileVarDec() self.vmWriter.writeFunction(subrotineName, self.symbolTable.varCount("var")) # allocate memory for constructor # if functype == "constructor": # self.vmWriter.writePush("constant" , self.symbolTable.varCount("field")) # self.vmWriter.writeCall("Memory.alloc", "1") if functype == "constructor" or functype == "method": if functype == "constructor": self.vmWriter.writePush("constant" , self.symbolTable.varCount("field")) self.vmWriter.writeCall("Memory.alloc", "1") else: self.vmWriter.writePush("argument", "0") self.vmWriter.writePop("pointer", "0") # statements self.compileStatements() # ignore } self.tokenizer.advance() def compileParameterList(self): # if not ) if self.tokenizer.tokenType() != 1: # type varName argtype = self.compileType() argname = self.tokenizer.identifier() self.symbolTable.define(argname, argtype, "arg") self.tokenizer.advance() # (, type varName)* while self.tokenizer.symbol() == ",": self.tokenizer.advance() argtype = self.compileType() argname = self.tokenizer.identifier() self.symbolTable.define(argname, argtype, "arg") self.tokenizer.advance() def compileVarDec(self): # var self.tokenizer.advance() # type type = self.compileType() # varName varname = self.tokenizer.identifier() self.symbolTable.define(varname, type, "var") self.tokenizer.advance() # (, varName)* while self.tokenizer.symbol() == ",": self.tokenizer.advance() varname = self.tokenizer.identifier() self.symbolTable.define(varname, type, "var") self.tokenizer.advance() # ignore ; self.tokenizer.advance() def compileStatements(self): while self.tokenizer.tokenType() == 0: if self.tokenizer.keyWord() == "let": self.compileLet() elif self.tokenizer.keyWord() == "if": self.compileIf() elif self.tokenizer.keyWord() == "while": self.compileWhile() elif self.tokenizer.keyWord() == "do": self.compileDo() elif self.tokenizer.keyWord() == "return": self.compileReturn() def compileDo(self): self.tokenizer.advance() self.compileSubRoutineCall() self.vmWriter.writePop("temp", "0") # ignore ; self.tokenizer.advance() def compileLet(self): # let self.tokenizer.advance() # varName varname = self.tokenizer.identifier() varkind = self.symbolTable.kindOf(varname) self.tokenizer.advance() # ([ expression ])? if self.tokenizer.symbol() == "[": self.tokenizer.advance() self.CompileExpression() if varkind == "field": self.vmWriter.writePush("this", self.symbolTable.indexOf(varname)) elif varkind == "var": self.vmWriter.writePush("local", self.symbolTable.indexOf(varname)) elif varkind == "arg": self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname)) elif varkind == "static": self.vmWriter.writePush("static", self.symbolTable.indexOf(varname)) self.vmWriter.writeArithmetic("add") #ignore ] self.tokenizer.advance() #ignore = self.tokenizer.advance() self.CompileExpression() self.vmWriter.writePop("temp", "0") # that self.vmWriter.writePop("pointer", "1") self.vmWriter.writePush("temp", "0") self.vmWriter.writePop("that", "0") self.tokenizer.advance() else: # ignore = self.tokenizer.advance() # expression self.CompileExpression() if varkind == "field": self.vmWriter.writePop("this", self.symbolTable.indexOf(varname)) elif varkind == "var": self.vmWriter.writePop("local", self.symbolTable.indexOf(varname)) elif varkind == "arg": self.vmWriter.writePop("argument", self.symbolTable.indexOf(varname)) elif varkind == "static": self.vmWriter.writePop("static", self.symbolTable.indexOf(varname)) #ignore ; self.tokenizer.advance() def compileWhile(self): # while self.tokenizer.advance() # ( expression ) self.tokenizer.advance() whileindex = self.whilecounter self.whilecounter += 1 self.vmWriter.writeLabel("WHILE_EXP" + str(whileindex)) self.CompileExpression() self.vmWriter.writeArithmetic("not") self.vmWriter.writeIf("WHILE_END" + str(whileindex)) self.tokenizer.advance() # ignore { self.tokenizer.advance() # statements self.compileStatements() # ignore } self.tokenizer.advance() self.vmWriter.writeGoto("WHILE_EXP" + str(whileindex)) self.vmWriter.writeLabel("WHILE_END" + str(whileindex)) def compileReturn(self): # return self.tokenizer.advance() # expression? if self.isTerm(): self.CompileExpression() self.vmWriter.writeReturn() else: self.vmWriter.writePush("constant", "0") self.vmWriter.writeReturn() # ignore; self.tokenizer.advance() def compileIf(self): #if self.tokenizer.advance() # ( expression ) self.tokenizer.advance() self.CompileExpression() ifindex = self.ifcounter self.ifcounter += 1 self.vmWriter.writeIf("IF_TRUE" + str(ifindex)) self.vmWriter.writeGoto("IF_FALSE" + str(ifindex)) self.vmWriter.writeLabel("IF_TRUE" + str(ifindex)) self.tokenizer.advance() # { statements } self.tokenizer.advance() self.compileStatements() self.tokenizer.advance() if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() == "else": # else self.vmWriter.writeGoto("IF_END" + str(ifindex)) self.vmWriter.writeLabel("IF_FALSE" + str(ifindex)) self.tokenizer.advance() # { statements } self.tokenizer.advance() self.compileStatements() self.tokenizer.advance() self.vmWriter.writeLabel("IF_END" + str(ifindex)) else: self.vmWriter.writeLabel("IF_FALSE" + str(ifindex)) def CompileExpression(self): #term self.CompileTerm() # (op term)* op = self.tokenizer.symbol() while self.tokenizer.tokenType() == 1 and op in operators: self.tokenizer.advance() self.CompileTerm() if op == "=": self.vmWriter.writeArithmetic("eq") elif op == "+": self.vmWriter.writeArithmetic("add") elif op == "-": self.vmWriter.writeArithmetic("sub") elif op == "*": self.vmWriter.writeCall("Math.multiply", "2") elif op == "/": self.vmWriter.writeCall("Math.divide", "2") elif op == "&": self.vmWriter.writeArithmetic("and") elif op == "|": self.vmWriter.writeArithmetic("or") elif op == "<": self.vmWriter.writeArithmetic("lt") elif op == ">": self.vmWriter.writeArithmetic("gt") op = self.tokenizer.symbol() def CompileTerm(self): if self.tokenizer.tokenType() == 3: self.vmWriter.writePush("constant", self.tokenizer.intVal()) self.tokenizer.advance() elif self.tokenizer.tokenType() == 4: conststring = self.tokenizer.stringVal() self.vmWriter.writePush("constant", str(len(conststring))) self.vmWriter.writeCall("String.new", "1") for i in range(len(conststring)): self.vmWriter.writePush("constant", str(ord(conststring[i]))) self.vmWriter.writeCall("String.appendChar", "2") self.tokenizer.advance() elif self.tokenizer.tokenType() == 0: keywordconst = self.tokenizer.keyWord() if keywordconst == "true": self.vmWriter.writePush("constant", "0") self.vmWriter.writeArithmetic("not") elif keywordconst == "false" or keywordconst == "null": self.vmWriter.writePush("constant", "0") elif keywordconst == "this": self.vmWriter.writePush("pointer", "0") self.tokenizer.advance() elif self.tokenizer.tokenType() == 2: # varName [ expression] if self.tokenizer.tokens[self.tokenizer.currentToken +1] == '[': varname = self.tokenizer.identifier() varkind = self.symbolTable.kindOf(varname) self.tokenizer.advance() # [ expression ] self.tokenizer.advance() self.CompileExpression() if varkind == "field": self.vmWriter.writePush("this", self.symbolTable.indexOf(varname)) elif varkind == "var": self.vmWriter.writePush("local", self.symbolTable.indexOf(varname)) elif varkind == "arg": self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname)) elif varkind == "static": self.vmWriter.writePush("static", self.symbolTable.indexOf(varname)) self.vmWriter.writeArithmetic("add") # that self.vmWriter.writePop("pointer", "1") self.vmWriter.writePush("that", "0") self.tokenizer.advance() # subrutine call elif self.tokenizer.tokens[self.tokenizer.currentToken +1] == '(' or self.tokenizer.tokens[self.tokenizer.currentToken +1] == '.': self.compileSubRoutineCall() # varname else: varname = self.tokenizer.identifier() varkind = self.symbolTable.kindOf(varname) if varkind == "field": self.vmWriter.writePush("this", self.symbolTable.indexOf(varname)) elif varkind == "var": self.vmWriter.writePush("local", self.symbolTable.indexOf(varname)) elif varkind == "arg": self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname)) elif varkind == "static": self.vmWriter.writePush("static", self.symbolTable.indexOf(varname)) self.tokenizer.advance() elif self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(': # ( expression ) self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() else: #unary!!! op = self.tokenizer.symbol() self.tokenizer.advance() self.CompileTerm() if op == "-": self.vmWriter.writeArithmetic("neg") elif op == "~": self.vmWriter.writeArithmetic("not") def compileSubRoutineCall(self): # subroutineName | (className | varName) identifier = self.tokenizer.identifier() self.tokenizer.advance() #no "." only name if self.tokenizer.symbol() == '(': # ( expressionList ) -- subroutine of type method self.tokenizer.advance() self.vmWriter.writePush("pointer", "0") argnum = self.CompileExpressionList() self.vmWriter.writeCall(self.classname + "." + identifier, str(argnum +1)) self.tokenizer.advance() else: # . -- class.function or var.method self.tokenizer.advance() # subroutineName subname = self.tokenizer.identifier() self.tokenizer.advance() self.tokenizer.advance() if identifier in self.symbolTable.classtable or identifier in self.symbolTable.subroutinetable: # varname!!! if identifier in self.symbolTable.subroutinetable: if self.symbolTable.kindOf(identifier) == "var": self.vmWriter.writePush("local", self.symbolTable.indexOf(identifier)) else: self.vmWriter.writePush("argument", self.symbolTable.indexOf(identifier)) else: if self.symbolTable.kindOf(identifier) == "static": self.vmWriter.writePush("static", self.symbolTable.indexOf(identifier)) else: self.vmWriter.writePush("this", self.symbolTable.indexOf(identifier)) argnum = self.CompileExpressionList() identifierclass = self.symbolTable.typeOf(identifier) self.vmWriter.writeCall(identifierclass + "." + subname, str(argnum +1)) else: argnum = self.CompileExpressionList() self.vmWriter.writeCall(identifier + "." + subname, str(argnum)) self.tokenizer.advance() def CompileExpressionList(self): # (expression i = 0 if self.isTerm(): i += 1 # (, expression) self.CompileExpression() while self.tokenizer.symbol() == ',': i+= 1 self.tokenizer.advance() self.CompileExpression() return i def isTerm(self): if self.tokenizer.tokenType() == 3 or self.tokenizer.tokenType() == 4: return True if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() in keyword_const: return True if self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(' : return True if self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == '-' or self.tokenizer.symbol() == '~'): return True if self.tokenizer.tokenType() == 2: return True return False def compileType(self): if self.tokenizer.tokenType() == 0: typen = self.tokenizer.keyWord() else: typen = self.tokenizer.identifier() self.tokenizer.advance() return typen
from SymbolTable import SymbolTable tab = SymbolTable() tab.addEntry("testSymbol", 32767) print "=====Testing SymbolTable" if not tab.contains("testSymbol"): print "SymbolTable.contains should return true" if tab.getAddress("testSymbol") != "111111111111111": print "SymbolTable.getAddress did not return correct value" tab.addVariable("testVar") if tab.getAddress("testVar") != "000000000010000": print "SymbolTable.getAddress did not return correct value for var" print "Done testing SymbolTable====="
class STManager(): def __init__(self): # create the root symbol table self.root = ST() # intialize the stack of symbol tables for activation record self.activeSTs = [self.root] # table of funcitons self.ftable = {}; """ Make Symbol Table create a new table and return a pointer to new table @params _prev {SymbolTable} -- parent symbol table pointer @return {SymbolTable} -- newly created symbol table """ def makeTable(self, _prev): # create a new symbol table table newST = ST() # make _prev the parent of new table newST.parent = _prev # return the new symbol table return newST """ Lookup Loopup for the symbol in the activation record @params _symbol {string} -- symbol for which look to be done @return {bool} -- symbol found or not """ def lookup(self, _symbol): return self.currActive.search(_symbol) """ Lookup In Root for a symbol. Used to lookup for globals and functions Loopup for the symbol in the activation record @params _symbol {string} -- symbol for which look to be done @return {bool} -- symbol found or not """ def lookupInRoot(self, _symbol): return self.root.search(_symbol) """ Push push symbol table onto symbol tables stack @params _st {SymbolTable} -- symbol table to be pushed onto the stack """ def push(self, _st): # push the new symbol on activation record's stack self.activeSTs.append(_st) """ Pop remove the top of the symbol tables stack @return {SymbolTable} -- removed symbol table """ def pop(self): return self.activeSTs.pop() """ Insert into symbol table create a new entry into symbol table and put the data @params _name {string} -- name (key | id) of the new entry @params _type {string} -- type of name (an attribute) @params _offset {integer} -- size | offset for the name """ def insert(self, _name, _type, _width, _scope=None): return self.currActive.insert(_name, _type, _width, _scope) """ Add link to global variable into the current active symbol table """ def linkGlobalSym(self, _name, _attrs): self.currActive.linkGlobalSym(_name, _attrs) """ Enter a new entry for a procedure Create a new entry for a procedure. @params _name {string} -- name of the procedure @params _lineNumber {int} -- liner number where function is defined @params _procST {SymbolTable} -- symbol table for a procedure """ def enterProc(self, _name, _lineNumber, _numParams, _procST): self.ftable[_lineNumber] = { "place": _name, "numParams": _numParams, "st": _procST } return self.currActive.enterProc(_name, _lineNumber, _numParams, _procST) """ Set attribute set attributes for a symbol in symbol table @params _symbol {string} -- symbol for which attribute is to be set @params _key {string} -- key for the attribute @params _val {object} -- value for the attribute """ def setAttr(self, _symbol, _key, _val): self.currActive.setAttr(_symbol, _key, _val) """ Get attribute get attribute for a symbol in symbol table @params _symbol {string} -- symbol for which attribute should be fetched @params _key {string} -- attribute key @return {object} -- value of attribute for key if found else None """ def getAttr(self, _symbol, _key): return self.currActive.getAttr(_symbol, _key) """ Get all the attributes get attribute for a symbol in symbol table @params _symbol {string} -- symbol for which attribute should be fetched @return {object} -- value of attribute for key if found else None """ def getAttrs(self, _symbol): return self.currActive.getAttrs(_symbol) """ Current Active Symbol Table (getter) get the current active symbol table. Just a convenient method """ @property def currActive(self): # return the top return self.activeSTs[-1] """ Insert all the keywords insert the keywords into the root of the symbol table so that they can be easily accessible """ def insertKeywords(self): return None
class Compiler: def __init__(self, path): with open(path) as f: text = f.read() self.tokens = tokenize(text) self.count = 0 self.ClassScope = SymbolTable() self.SubroutineScope = SymbolTable() self.label_count = {"if": 0, "while": 0} self.vmcode = "" def writeVMcode(self, path): self.compileClass() with open(path, "w") as f: f.write(self.vmcode) def takeWord(self): token = self.tokens[self.count]["token"] self.count += 1 return token def advance(self): self.count += 1 def compileClass(self): self.advance() # class self.className = self.takeWord() # className self.advance() # { while self.tokens[self.count]["token"] in ["static", "field"]: self.compileClassVarDec() while self.tokens[self.count]["token"] in [ "constructor", "method", "function" ]: self.compileSubroutine() self.advance() # } def compileClassVarDec(self): symbolKind = self.takeWord() # (static|field) symbolType = self.takeWord() # type symbolName = self.takeWord() # VarName self.ClassScope.define(symbolName, symbolType, symbolKind) while not self.tokens[self.count]["token"] == ";": self.advance() # , symbolName = self.takeWord() # VarName self.ClassScope.define(symbolName, symbolType, symbolKind) self.advance() # ; def compileSubroutine(self): self.SubroutineScope.reset() ftype = self.takeWord() # (constructor|method|function) self.advance() # type subroutineName = self.takeWord() # SubroutineName self.advance() # ( self.complieParameterList() self.advance() # ) self.advance() # { num_locals = 0 while self.tokens[self.count]["token"] in ["var"]: num_locals += self.compileVarDec() self.vmcode += VMWriter.writeFunction( self.className + "." + subroutineName, num_locals) # if function is constructor, it should allocate memory for produced object if ftype == "constructor": # allocate needed memory num_field = self.ClassScope.varCount("field") self.vmcode += VMWriter.writePush("constant", num_field) self.vmcode += VMWriter.writeCall("Memory.alloc", 1) # link it to This self.vmcode += VMWriter.writePop("pointer", 0) # change all FIELD in ClassScope to THIS self.ClassScope.field2This() self.compileStatements() self.advance() # } self.label_count["if"] = 0 self.label_count["while"] = 0 def complieParameterList(self): if self.tokens[self.count]["token"] == ")": # has no parameters pass else: symbolKind = "argument" symbolType = self.takeWord() # type symbolName = self.takeWord() # VarName self.SubroutineScope.define(symbolName, symbolType, symbolKind) while self.tokens[self.count]["token"] == ",": self.advance() # , symbolType = self.takeWord() # type symbolName = self.takeWord() # VarName self.SubroutineScope.define(symbolName, symbolType, symbolKind) def compileVarDec(self): symbolKind = "local" self.advance() # var symbolType = self.takeWord() # type symbolName = self.takeWord() # VarName self.SubroutineScope.define(symbolName, symbolType, symbolKind) num_locals = 1 while self.tokens[self.count]["token"] == ",": self.advance() # , symbolName = self.takeWord() # VarName self.SubroutineScope.define(symbolName, symbolType, symbolKind) num_locals += 1 self.advance() # ; return num_locals def compileStatements(self): if self.tokens[self.count] == "}": # no statements pass else: while self.tokens[self.count]["token"] in [ "do", "let", "while", "return", "if" ]: if self.tokens[self.count]["token"] == "do": self.compileDo() elif self.tokens[self.count]["token"] == "let": self.compileLet() elif self.tokens[self.count]["token"] == "while": self.compileWhile() elif self.tokens[self.count]["token"] == "return": self.compileReturn() elif self.tokens[self.count]["token"] == "if": self.compileIf() def compileDo(self): self.advance() # do caller = self.takeWord() # (className|varName) symbol = self.findSymbol(caller) # if object exist, push it in to stack if symbol: self.advance() # . func = self.takeWord() # subroutineName segment = 'local' index = symbol['index'] self.vmcode += VMWriter.writePush(segment, index) symbolType = symbol['type'] # Static method called elif self.tokens[self.count]["token"] == ".": symbolType = caller self.advance() # . func = self.takeWord() # subroutineName else: symbolType = self.className func = caller # decide subroutine name subroutineName = symbolType + '.' + func self.advance() # ( num_args = self.compileExpressionList() if symbol: # add "this" num_args += 1 # call the function self.vmcode += VMWriter.writeCall(subroutineName, num_args) # since this code will not save returned value in to variable, pop it out. self.vmcode += VMWriter.writePop('temp', '0') self.advance() # ) self.advance() # ; def compileLet(self): self.advance() # let symbolName = self.takeWord() # varName symbol = self.findSymbol(symbolName) isArray = self.tokens[self.count]["token"] == "[" if isArray: self.advance() # [ # this will push value to stack self.compileExpression() # push base address self.vmcode += VMWriter.writePush(symbol['kind'], symbol['index']) # add two addresses self.vmcode += VMWriter.writeArithmetic('+') self.advance() # ] self.advance() # = # this will push result to stack self.compileExpression() if not isArray: self.vmcode += VMWriter.writePop(symbol['kind'], symbol['index']) else: # store result to temp 0 self.vmcode += VMWriter.writePop('temp', '0') # store address of array to THAT self.vmcode += VMWriter.writePop('pointer', '1') # restore result self.vmcode += VMWriter.writePush('temp', '0') # save result to THAT self.vmcode += VMWriter.writePop('that', '0') self.advance() # ; def compileWhile(self): # label whileStart self.vmcode += VMWriter.writeLabel("WHILE_START_" + str(self.label_count["while"])) self.advance() # while self.advance() # ( self.compileExpression() self.advance() # ) # if not exp == True: goto whileEnd self.vmcode += VMWriter.writeArithmetic("~") self.vmcode += VMWriter.writeIfGoto("WHILE_END_" + str(self.label_count["while"])) self.advance() # { self.compileStatements() # goto whileStart self.vmcode += VMWriter.writeGoto("WHILE_START_" + str(self.label_count["while"])) self.advance() # } # label whileEnd self.vmcode += VMWriter.writeLabel("WHILE_END_" + str(self.label_count["while"])) self.label_count["while"] += 1 def compileReturn(self): self.advance() # return if not self.tokens[self.count]["token"] == ";": # push result to stack self.compileExpression() else: # push constant 0 to stack for void self.vmcode += VMWriter.writePush("constant", "0") self.advance() # ; self.vmcode += VMWriter.writeReturn() def compileIf(self): self.advance() # if self.advance() # ( self.compileExpression() self.advance() # ) # if expression: goto IF_TRUE self.vmcode += VMWriter.writeIfGoto("IF_TRUE_" + str(self.label_count["if"])) # if not expression: goto IF_FALSE self.vmcode += VMWriter.writeGoto("IF_FALSE_" + str(self.label_count["if"])) # label IF_TRUE self.vmcode += VMWriter.writeLabel("IF_TRUE_" + str(self.label_count["if"])) self.advance() # { self.compileCondStatements() self.advance() # } # else exist if self.tokens[self.count]["token"] == "else": self.advance() # else self.advance() # { # if excuted if part, got IF_END self.vmcode += VMWriter.writeGoto("IF_END_" + str(self.label_count["if"])) # label IF_FALSE self.vmcode += VMWriter.writeLabel("IF_FALSE_" + str(self.label_count["if"])) self.compileCondStatements() # label IF_END self.vmcode += VMWriter.writeLabel("IF_END_" + str(self.label_count["if"])) self.advance() # } else: # label IF_FALSE self.vmcode += VMWriter.writeLabel("IF_FALSE_" + str(self.label_count["if"])) def compileCondStatements(self): # nested if if self.tokens[self.count]["token"] == "if": self.label_count["if"] += 1 self.compileStatements() self.label_count["if"] -= 1 else: self.compileStatements() def compileExpression(self): # Order of operations is from front to back self.compileTerm() while self.tokens[self.count]["token"] in [ "+", "-", "*", "/", "&", "|", "<", ">", "=" ]: op = self.takeWord() # op self.compileTerm() # execute op if op == "*": self.vmcode += VMWriter.writeCall("Math.multiply", 2) elif op == "/": self.vmcode += VMWriter.writeCall("Math.divide", 2) else: self.vmcode += VMWriter.writeArithmetic(op) def compileTerm(self): # unaryOP term if self.tokens[self.count]["token"] in ["-", "~"]: op = self.takeWord() # op self.compileTerm() self.vmcode += VMWriter.writeArithmetic(op) # (exp) elif self.tokens[self.count]["token"] == "(": self.advance() # ( self.compileExpression() self.advance() # ) # Subroutine Call elif self.tokens[self.count + 1]["token"] in ["(", "."]: self.compileSubroutineCall() # Array element elif self.tokens[self.count + 1]["token"] == "[": self.compileArrayEXP() # intConst elif self.tokens[self.count]["Type"] == "INT_CONST": i = self.takeWord() self.vmcode += VMWriter.writePush("constant", i) # StringConst elif self.tokens[self.count]["Type"] == "STRING_CONST": s = self.takeWord() self.compileString(s) # KeyConst elif self.tokens[self.count]["Type"] == "KEYWORDS": word = self.takeWord() if word == "null": self.vmcode += VMWriter.writePush("constant", 0) elif word == "true": self.vmcode += VMWriter.writePush("constant", 0) self.vmcode += VMWriter.writeArithmetic("~") elif word == "false": self.vmcode += VMWriter.writePush("constant", 0) elif word == "this": self.vmcode += VMWriter.writePush("pointer", 0) # varName elif self.tokens[self.count]["Type"] == "IDENTIFIER": var = self.takeWord() symbol = self.findSymbol(var) segment = symbol['kind'] index = symbol['index'] self.vmcode += VMWriter.writePush(segment, index) def compileString(self, s): # use standard library String str_len = len(s) self.vmcode += VMWriter.writePush("constant", str_len) self.vmcode += VMWriter.writeCall("String.new", 1) for c in s: if not c == "\"": asciic = ord(c) self.vmcode += VMWriter.writePush("constant", asciic) self.vmcode += VMWriter.writeCall("String.appendChar", 2) def compileSubroutineCall(self): subroutine_name = "" if self.tokens[self.count + 1]["token"] == ".": for i in range(3): subroutine_name += self.takeWord( ) # (className|varName) . subroutineName else: subroutine_name = self.takeWord() # subroutineName self.advance() # ( num_args = self.compileExpressionList() self.vmcode += VMWriter.writeCall(subroutine_name, num_args) self.advance() # ) def compileArrayEXP(self): symbolName = self.takeWord() # varName symbol = self.findSymbol(symbolName) self.advance() # [ self.compileExpression() # push base address to stack self.vmcode += VMWriter.writePush("local", symbol['index']) # add index(expression result) and base addresses self.vmcode += VMWriter.writeArithmetic('+') # pop address into THAT self.vmcode += VMWriter.writePop("pointer", "1") # push value to stack self.vmcode += VMWriter.writePush("that", "0") self.advance() # ] def compileExpressionList(self): num_args = 0 if self.tokens[self.count]["token"] == ")": return num_args else: self.compileExpression() num_args += 1 while self.tokens[self.count]["token"] == ",": self.advance() # , self.compileExpression() num_args += 1 return num_args def findSymbol(self, symbol): if symbol in [s["name"] for s in self.SubroutineScope.symbols]: return [ s for s in self.SubroutineScope.symbols if s["name"] == symbol ][0] elif symbol in [s["name"] for s in self.ClassScope.symbols]: return [s for s in self.ClassScope.symbols if s["name"] == symbol][0] else: return None
def visit_WhileInstr(self,node): # print "visiting While" node.condition.accept(self) self.symbolTable = SymbolTable(self.symbolTable,'while') node.instruction.accept(self) self.symbolTable = self.symbolTable.getParentScope()
class CompilationEngine: _MyTokenizer = None _MyOutputFile = None _MyClassToken = None MyVMWriter = None CurrentSubroutine = None CurrentClass = None MySymbolTable = None LabelNum = 0 numExpressions = 0 _classTypes = ["int", "char", "boolean"] _classVarDecOpenings = ["static", "field"] _subroutineDecOpenings = ["constructor", "function", "method"] _statementOpenings = ["let", "if", "while", "do", "return"] _operators = ["+", "-", "*", "/", "&", "|", "<", ">", "="] _unaryOperators = {"-": "neg", "~": "not"} _keywordConstants = ["null", "true", "false", "this"] subroutineNames = [] classNames = [] varNames = [] def __init__(self, Tokenizer, OutputFile): self._MyTokenizer = Tokenizer self._MyOutputFile = OutputFile self.MyVMWriter = VMWriter(self._MyOutputFile) self.MySymbolTable = SymbolTable() self.CurrentSubroutine = None self.CurrentClass = None self.LabelNum = 0 self.numExpressions = 0 return def compileClass(self): classText = "class" if self.token().text() != classText: try: raise Exception("Keyword '" + classText + "' expected\n") except Exception, err: sys.stderr.write(str(err)) return self._MyClassToken = Token(None) # first insert: 'class' self.insert(self._MyClassToken, "class", Token.Keyword) # class name self.MySymbolTable = SymbolTable() self.CurrentClass = self.token().text() self.classNames.append(self.token().text()) self.insert(self._MyClassToken, None, Token.Identifier) self.insert(self._MyClassToken, "{", Token.Symbol) while (self._MyTokenizer.hasMoreTokens() and self.token().text() in self._classVarDecOpenings): self.compileClassVarDec(self._MyClassToken) while (self._MyTokenizer.hasMoreTokens() and self.token().type() == Token.Keyword and self.token().text() in self._subroutineDecOpenings): self.compileSubroutineDec(self._MyClassToken) self.insert(self._MyClassToken, "}", Token.Symbol) self.CurrentClass = None
def __init__(self): super().__init__() self.st = SymbolTable()
class CompilationEngine: def __init__(self, input_stream: str, jack_tokenizer: JackTokenizer): """ creates a new compilation engine with the given input and output. :param input_stream: given input stream :param jack_tokenizer: given jack tokenizer """ self.tokenizer = jack_tokenizer self.tokens = jack_tokenizer.get_tokens() self.file_name = input_stream.replace(".jack", "") self.output_file_name = input_stream.replace(".jack", ".xml") self.output_file = open(self.output_file_name, "wb") self.current_class_name = None self.root = None self.label_counter = 0 self.tree = None # ----- identifier type, project 11, Wednesday -------- # self.identifier_counter = {LOCAL: 0, ARGUMENT: 0, STATIC: 0, FIELD: 0} # ----------------------------------------------------- # self.symbol_table = SymbolTable() self.VMWriter = None def compile(self) -> None: """ method to compile jack file and close file afterwards :return: none """ self.tokenizer.advance() self.compile_class() self.output_file.close() def compile_class(self) -> None: """ compiles a class :return: None """ # create VMWriter for current class self.VMWriter = VMWriter(self.file_name) # was class self.tokenizer.advance() # now name # current class name : self.current_class_name = self.tokenizer.get_current_token()[1] # was name self.tokenizer.advance() # now { # was { self.tokenizer.advance() # now class body while self.tokenizer.has_more_tokens(): current_token = self.tokenizer.get_current_token() token_string = current_token[1] if CompilationEngine.is_class_field(token_string): self.compile_class_var_declaration() elif CompilationEngine.is_subroutine(token_string): self.compile_subroutine() # insert last "}" of end of class current_token = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # # ***** testing ***** # # tree = etree.ElementTree(self.root) # # etree.indent(self.root, "") # tree.write(self.output_file, pretty_print=True) @staticmethod def is_subroutine(token: str) -> bool: """ method to check if token is subroutine :param token: string of current token :return: true if subroutine declaration, false otherwise """ return ((token == "constructor") or (token == "function") or ( token == "method")) @staticmethod def is_var_declare(token: str) -> bool: return token == "var" @staticmethod def is_class_field(token: str) -> bool: """ method to check if token is class field :param token: string of current token :return: true if class field declaration, false otherwise """ return (token == "static") or (token == "field") @staticmethod def is_statement(token: str) -> bool: return (token == LET) or (token == IF) or (token == WHILE) or ( token == DO) or (token == RETURN) def insert_next_token(self, root) -> None: """ insert next token :return: none """ current_token = self.tokenizer.get_current_token() token_type = current_token[0] token_string = current_token[1] if token_type == JackTokenizer.STRING_TYPE: token_string = token_string[1:-1] etree.SubElement(root, token_type).text = " " + token_string + " " self.tokenizer.advance() def compile_class_var_declaration(self) -> None: """ compiles a variable declaration :return: None """ # variable kind: field | static kind = self.tokenizer.get_current_token()[1] # field | static self.tokenizer.advance() # variable type type_var = self.tokenizer.get_current_token()[1] # int|char|boolean self.tokenizer.advance() # variable name name = self.tokenizer.get_current_token()[1] # varName self.tokenizer.advance() # adding to symbol table if kind == STATIC: # static variable self.identifier_counter[STATIC] += 1 else: # class field self.identifier_counter[FIELD] += 1 # adding to symbol table anyways self.symbol_table.define(name, type_var, kind) # run in a loop and print all names, with "," in between while self.tokenizer.current_word == COMMA: # , self.tokenizer.advance() # need to add to symbol table as well # type is as before, and kind is as before # still needs to add to counter name = self.tokenizer.get_current_token()[1] # adding to symbol table if kind == STATIC: # static variable self.identifier_counter[STATIC] += 1 else: # class field self.identifier_counter[FIELD] += 1 # adding to symbol table anyways self.symbol_table.define(name, type_var, kind) # varName self.tokenizer.advance() # end of declaration # ; current_token = self.tokenizer.get_current_token()[1] self.tokenizer.advance() def compile_subroutine(self) -> None: """ compiles a complete method function or constructor :return: None """ # restart as a new subroutine self.symbol_table.start_subroutine() # constructor | function | method subroutine_type = self.tokenizer.get_current_token()[1] # add this if it is a method if subroutine_type == METHOD: name = THIS var_type = self.current_class_name kind = ARGUMENT self.symbol_table.define(name, var_type, kind) # was function type self.tokenizer.advance() # now return type # was return type self.tokenizer.advance() # now subroutine name subroutine_name = self.tokenizer.get_current_token()[1] subroutine_name = self.current_class_name + DOT + subroutine_name # was name self.tokenizer.advance() # now ( # parameter list compilation # and inserting it into the subtree self.compile_parameter_list() # was ) self.tokenizer.advance() # now { # subroutine body self.compile_subroutine_body(subroutine_name, subroutine_type) # was } self.tokenizer.advance() # now token return def compile_subroutine_body(self, subroutine_name: str, subroutine_type: str): """ method to compile subroutine body :return: None """ n_locals = self.symbol_table.variable_counter[FIELD] # { current_token = self.tokenizer.get_current_token()[1] # vars inside var_count = 0 # was { self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # now subroutine body # read all variable declares while CompilationEngine.is_var_declare(current_token): # adding var declare subtree # to subroutine body element tree var_count = var_count + self.compile_var_declaration() current_token = self.tokenizer.get_current_token()[1] # function declare line self.VMWriter.write_function(subroutine_name, var_count) # putting this if subroutine_type == CONSTRUCTOR: # allocate memory for object # subroutine is constructor # push const nLocals self.VMWriter.write_push(CONSTANT, n_locals) # call Memory.alloc 1 self.VMWriter.write_call(ALLOCATION_METHOD, ONE_NUM) # (popping this): pop pointer 0 self.VMWriter.write_pop(POINTER, ZERO_NUM) elif subroutine_type == METHOD: # push argument 0 self.VMWriter.write_push(ARGUMENT, ZERO_NUM) # push pop pointer 0 self.VMWriter.write_pop(POINTER, ZERO_NUM) # subroutine statements self.compile_statements() # } self.tokenizer.advance() def compile_var_declaration(self) -> int: """ method to compile var declaration lines """ var_count = 0 # was var kind (var) kind = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now type # get type which is int|char|boolean|class type_var = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now name # get name which is int|char|boolean|class name = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now , or ; # adding to symbol table self.symbol_table.define(name, type_var, kind) var_count += 1 # run in a loop and print all names, with "," in between while self.tokenizer.current_word == COMMA: # was , var_count += 1 self.tokenizer.advance() # now name # get name which for the int|char|boolean|class var name = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now , or ; # adding to symbol table self.symbol_table.define(name, type_var, kind) # end of declaration # was ; self.tokenizer.advance() # now next line return var_count def compile_parameter_list(self) -> int: """ compiles a (CAN BE EMPTY) parameter list not including the enclosing "()" :return: var count of parameter list """ var_count = 0 # was ( self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # now arguments or ) # till we at the end of the param line -> ")" if current_token != END_OF_PARAM_LIST: var_count += 1 kind = ARGUMENT # was var_type var_type = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now var name # was var_name name = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now , or ) # possible_variable = self.get_variable_of_table(name) # if possible_variable is None: self.symbol_table.define(name, var_type, kind) # otherwise its inside current_token = self.tokenizer.get_current_token()[1] # go through param list while current_token == COMMA: var_count += 1 # was , self.tokenizer.advance() # now type # var_type var_type = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now var name # var_name name = self.tokenizer.get_current_token()[1] # possible_variable = self.get_variable_of_table(name) # if possible_variable is None: self.symbol_table.define(name, var_type, kind) # otherwise its inside self.tokenizer.advance() # now comma or ) # check again current token current_token = self.tokenizer.get_current_token()[1] return var_count def compile_statements(self) -> None: """ compiles a sequence of statements not including the enclosing {} :return: None """ # statement current_token = self.tokenizer.get_current_token()[1] if current_token == END_OF_CLASS: # end of function we return return peek_at_next = current_token # peek statements as long as we have them # determine their type # add the statement block to the # over all statements blocks while CompilationEngine.is_statement(peek_at_next): # pretty much straight forward # we have some types of statements # and we need to find out which one # and send to the fitting compilation method if peek_at_next == LET: self.compile_let() elif peek_at_next == IF: self.compile_if() elif peek_at_next == WHILE: self.compile_while() elif peek_at_next == DO: self.compile_do() elif peek_at_next == RETURN: self.compile_return() # adding the statement was done inside # getting the token we are on peek_at_next = self.tokenizer.peek_at_next_token()[1] def compile_do(self) -> None: """ compiles a do statement :return: None """ # peeked on do # now advanced to do current_token = self.tokenizer.get_current_token()[1] if current_token != DO: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # do self.tokenizer.advance() # what to do # --------------------------------------------- # # compilation of subroutine or some class routine # --------------------------------------------- # # subroutine_name # ------- or, for another class method --------- # class_name -> then .subroutine_name rout_or_class_name = self.tokenizer.get_current_token()[1] peek_at_token = self.tokenizer.peek_at_next_token()[1] if peek_at_token != START_OF_PARAM_LIST: self.tokenizer.advance() self.compile_call(rout_or_class_name) # now comes ; self.tokenizer.advance() # popping temp 0 self.VMWriter.write_pop(TEMP, ZERO_NUM) def compile_let(self) -> None: """ compiles a let statement -------------------- let "var_name" = "expression" ; -------------------- :return: None """ # peeked on let # now advanced to let current_token = self.tokenizer.get_current_token()[1] if current_token != LET: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] not_array_flag = True # should be varName, might be varName [] # was let self.tokenizer.advance() var_name = self.tokenizer.get_current_token()[1] # now var name # was var name self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # now = or [ if current_token == ARRAY_OPENER: not_array_flag = False self.calculate_memory_location(var_name) # were on = self.tokenizer.advance() # now on expression self.compile_expression() # after expression # comes; self.tokenizer.advance() if not_array_flag: # not array, we pop variable variable = self.get_variable_of_table(var_name) var_kind = variable[KIND] segment = SymbolTable.get_segment(var_kind) var_index = variable[INDEX] self.VMWriter.write_pop(segment, var_index) else: # array, we pop array element # pop temp 0 self.VMWriter.write_pop(TEMP, ZERO_NUM) # pop pointer 1 self.VMWriter.write_pop(POINTER, ONE_NUM) # push temp 0 self.VMWriter.write_push(TEMP, ZERO_NUM) # pop that 0 self.VMWriter.write_pop(THAT, ZERO_NUM) def calculate_memory_location(self, var_name): """ method to calculate location of current var index :param var_name: name of variable :return: """ # pushing name variable = self.get_variable_of_table(var_name) var_kind = variable[KIND] segment = SymbolTable.get_segment(var_kind) var_index = variable[INDEX] # after [ self.tokenizer.advance() # expression inside array self.compile_expression() self.VMWriter.write_push(segment, var_index) # write add to add memory places self.VMWriter.write_arithmetic(ADD) # were on whats inside array self.tokenizer.advance() # now on ] # were on ] self.tokenizer.advance() # now on expression def compile_while(self): """ compiles a while statement -------------------- while ( "expression" ) { "statements } -------------------- :return: None """ # peeked on while # now advanced to let current_token = self.tokenizer.get_current_token()[1] if current_token != WHILE: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # label L1 while_label = self.label_generator() self.VMWriter.write_label(while_label) # while self.tokenizer.advance() # ( self.tokenizer.advance() # expression of while self.compile_expression() # ~(cond) # negate condition negate = BINARY_DICT["~"] self.VMWriter.write_arithmetic(negate) # -------------------- # # ) self.tokenizer.advance() # if-goto L2 after_while_label = self.label_generator() self.VMWriter.write_if(after_while_label) # { self.tokenizer.advance() # statement self.tokenizer.advance() self.compile_statements() # goto L1 self.VMWriter.write_goto(while_label) # label L2 self.VMWriter.write_label(after_while_label) # } self.tokenizer.advance() def compile_return(self) -> None: """ compiles a return statement :return: None """ # peeked on return # now advanced to return current_token = self.tokenizer.get_current_token()[1] if current_token != RETURN: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] value_to_return = self.tokenizer.peek_at_next_token()[1] if value_to_return == COMMA_DOT: # no value to return self.tokenizer.advance() self.VMWriter.write_push(CONSTANT, ZERO_NUM) self.VMWriter.write_return() return # evaluate return value self.tokenizer.advance() self.compile_expression() self.VMWriter.write_return() # ; self.tokenizer.advance() def compile_if(self): """ compiles an if statement possibly with a trailing else clause -------------------- if ( "expression" ) { "statements } - might be else { } -------------------- :return: None """ # peeked on if # now advanced to if current_token = self.tokenizer.get_current_token()[1] if current_token != IF: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] L1 = self.label_generator() L2 = self.label_generator() # was if now ( self.tokenizer.advance() # cond # build if expression self.compile_expression() # ~(cond) # negate condition negate = BINARY_DICT["~"] self.VMWriter.write_arithmetic(negate) # -------------------- # # ) self.tokenizer.advance() # if-goto L1 self.VMWriter.write_if(L1) # -------------------- # # { self.tokenizer.advance() # insert whats inside if() { lalla } # VM code for s1 self.compile_statements() # -------------------- # # goto L2 self.VMWriter.write_goto(L2) # -------------------- # # } self.tokenizer.advance() # now we might have else: current_token = self.tokenizer.get_current_token()[1] current_peek = self.tokenizer.peek_at_next_token()[1] # label L1 self.VMWriter.write_label(L1) # -------------------- # # statements 2 is else : if (current_peek == ELSE) | (current_token == ELSE): if current_peek == ELSE: self.tokenizer.advance() # now else self.tokenizer.advance() # { self.tokenizer.advance() self.compile_statements() # } self.tokenizer.advance() # label L2 self.VMWriter.write_label(L2) # -------------------- # def compile_expression(self) -> None: """ compiles an expression -------------------- term (optional term)? term: var_name or constant - var_name: string with no digit - constant: decimal number -------------------- :return: tree of an expression """ # first term self.compile_term() peek_at_token = self.tokenizer.peek_at_next_token()[1] while peek_at_token in BINARY_OPERATORS: # binary op self.tokenizer.advance() operation = self.tokenizer.get_current_token()[1] # expression self.tokenizer.advance() # compile term self.compile_term() arithmetic_command = BINARY_DICT[peek_at_token] self.VMWriter.write_arithmetic(arithmetic_command) # renew again peek_at_token = self.tokenizer.peek_at_next_token()[1] def compile_term(self) -> None: """ compiles a term. if the current token is an identifier we distinguish between - a variable: . - an array entry: [ - subroutine call: ( :return: None """ # get current token we insert current_token = self.tokenizer.get_current_token() token_type = current_token[0] token_string = current_token[1] # integerConstant if token_type == JackTokenizer.INT_TYPE: self.VMWriter.write_push(CONSTANT, token_string) # stringConstant elif token_type == JackTokenizer.STRING_TYPE: # construction of string inside self.construct_string(token_string) # keywordConstant elif token_type == JackTokenizer.KEYWORD_TYPE: if token_string == TRUE: self.VMWriter.write_push(CONSTANT, ZERO_NUM) neg_op = BINARY_DICT["~"] self.VMWriter.write_arithmetic(neg_op) if token_string == FALSE: self.VMWriter.write_push(CONSTANT, ZERO_NUM) elif token_string == THIS: self.VMWriter.write_push(POINTER, ZERO_NUM) elif token_string == NULL: self.VMWriter.write_push(CONSTANT, ZERO_NUM) # unaryOperator {- , ~} elif token_string in UNARY_OPERATORS: # operator to print after expression # we can not sub anything, we negate. if token_string == "-": token_string = "!" op = BINARY_DICT[token_string] self.tokenizer.advance() # create a term of the inside of the operator self.compile_term() # neg if - # not if ~ self.VMWriter.write_arithmetic(op) # advance to next term # anyways we have a varNam or, varName[] or, subroutineCall () or () # ( -> some expression -> ) elif token_string == START_OF_PARAM_LIST: # ( self.tokenizer.advance() # insert expression self.compile_expression() # ) # advance to next term self.tokenizer.advance() else: # was some identifier possibly_parent = self.tokenizer.peek_at_next_token()[1] # now . or [ # pretty much straight forward: # 1. array opener [] # 2. expression opener () # function call # 3. className. -> and then # 2. call of subroutineName() # 4. simple varName if possibly_parent == ARRAY_OPENER: self.tokenizer.advance() self.array_variable(token_string) elif possibly_parent == START_OF_PARAM_LIST: # subroutine call immediately # ( # lets compile it as a call. self.compile_call(token_string) elif possibly_parent == DOT: # . self.tokenizer.advance() # we have a possible className in token_string # now we will have a subroutine name and call self.compile_call(token_string) else: self.simple_variable(token_string) def simple_variable(self, var_name) -> None: """ method to push simple variable :param var_name: var name we push :return: None """ variable = self.get_variable_of_table(var_name) var_kind = variable[KIND] segment = SymbolTable.get_segment(var_kind) var_index = variable[INDEX] self.VMWriter.write_push(segment, var_index) def array_variable(self, var_name): variable = self.get_variable_of_table(var_name) var_kind = variable[KIND] var_index = variable[INDEX] segment = SymbolTable.get_segment(var_kind) # [ self.tokenizer.advance() # expression inside [] self.compile_expression() # push start of array self.VMWriter.write_push(segment, var_index) # handling writing to an array element # adding to base address, the expression self.VMWriter.write_arithmetic(ADD) # pop pointer 1 self.VMWriter.write_pop(POINTER, ONE_NUM) # push that 0 self.VMWriter.write_push(THAT, ZERO_NUM) # closing array # ] self.tokenizer.advance() def compile_expression_list(self) -> int: """ compiles (might be empty list) a comma separated list of expression :return: amount of expressions """ current_token = self.tokenizer.get_current_token()[1] # we are on ( self.tokenizer.advance() # now we on ) or argument arguments_count = 0 # we start unless we are already at ")" # just like with param list # or arg or ) current_token = self.tokenizer.get_current_token()[1] if current_token != END_OF_PARAM_LIST: arguments_count += 1 # compiling argument self.compile_expression() # close of expression self.tokenizer.advance() # renew current token current_token = self.tokenizer.get_current_token()[1] while current_token == COMMA: # was , -> now ) or argument self.tokenizer.advance() # now new argument arguments_count += 1 # new expression tree self.compile_expression() # on term self.tokenizer.advance() # and go again, renew current token current_token = self.tokenizer.get_current_token()[1] return arguments_count def label_generator(self) -> str: """ helper method method to generate new label :return: str of new label """ label = LABEL + str(self.label_counter) self.label_counter += 1 return label def construct_string(self, token_string): # need to call String.new token_string = token_string[1:-1] memory_to_alloc = len(token_string) self.VMWriter.write_push(CONSTANT, memory_to_alloc) # calling String.new 1, empty string of size (memory to alloc) self.VMWriter.write_call(STRING_ALLOC_METHOD, ONE_NUM) # need to add ascii value of chars: for char_of_string in token_string: ascii_value = ord(char_of_string) self.VMWriter.write_push(CONSTANT, ascii_value) self.VMWriter.write_call(STRING_APPENDING, TWO_NUM) def compile_call(self, rout_or_class_name) -> None: """ method to compile call :param rout_or_class_name: name of class or subroutine :return: none """ variable = self.get_variable_of_table(rout_or_class_name) if variable is not None: rout_or_class_name = variable[TYPE] subroutine_type = variable[TYPE] var_index = variable[INDEX] var_kind = SymbolTable.get_segment(variable[KIND]) self.VMWriter.write_push(var_kind, var_index) else: subroutine_type = None # . or subroutine name current_token = self.tokenizer.get_current_token()[1] if current_token == DOT: # it is a call for a className.subroutineName # was . self.tokenizer.advance() # now subroutine name # subroutine_name subroutine_name = self.tokenizer.get_current_token()[1] # Class.Subroutine subroutine_name = rout_or_class_name + DOT + subroutine_name else: # a subroutine name self.VMWriter.write_push(POINTER, ZERO_NUM) subroutine_name = self.current_class_name + DOT + rout_or_class_name subroutine_type = METHOD if (subroutine_type is None) | (subroutine_type == VOID): # other class of void arguments = 0 else: # method or constructor arguments = 1 # start of expression list # ------------------------ # was subroutine name self.tokenizer.advance() # now ( # compilation of expression list arguments = arguments + self.compile_expression_list() # -------------------- # # end of expression list # -------------------- # # call subroutine_name arguments self.VMWriter.write_call(subroutine_name, arguments) def get_variable_of_table(self, var_name): """ method to get variable of one of tables :param var_name: var name to get :return: dict of variable """ variable = None # if in both if (var_name in self.symbol_table.variable_table.keys()) & \ (var_name in self.symbol_table.subroutine_table.keys()): variable = self.symbol_table.subroutine_table[var_name] elif var_name in self.symbol_table.variable_table.keys(): variable = self.symbol_table.variable_table[var_name] elif var_name in self.symbol_table.subroutine_table.keys(): variable = self.symbol_table.subroutine_table[var_name] return variable
from Tokenizer import tokenType, tokenWrap, insideAstring from Parser import Parser import glob import os nonTerminals = [ 'class', 'classVarDec', 'subroutineDec', 'parameterList', 'subroutineBody', 'varDec', 'statements', 'whileStatement', 'ifStatement', 'returnStatement', 'letStatement', 'doStatement', 'expression', 'term', 'expressionList' ] from SymbolTable import SymbolTable, Node, LinkedList parserSymbolTable = SymbolTable() # OS and other functions #defineSubroutineTracker(self.subroutineName,'method' ,self.className,self.subroutineVoid) parserSymbolTable.defineSubroutineTracker('deAlloc', '1', 'OS', True) # void parserSymbolTable.defineSubroutineTracker('keyPressed', '0', 'OS', False) # not void parserSymbolTable.defineSubroutineTracker('wait', '1', 'OS', True) # void # methods have k+1 because the plust one is the object 'argument 0' or 'pointer 0' #parserSymbolTable.define('moveUp','1','OS') #parserSymbolTable.define('moveDown','1','OS') #parserSymbolTable.define('moveLeft','1','OS') #parserSymbolTable.define('moveRight','1','OS') #parserSymbolTable.define('incSize','1','OS') #parserSymbolTable.define('dispose','1','OS') #parserSymbolTable.define('decSize','1','OS') parserSymbolTable.define('new', '1', 'OS')
class TypeChecker(NodeVisitor): def __init__(self): self.symbol_table = SymbolTable(None, "global") self.loop_nest = 0 self.errors = False def verify_matrices(self, operator, type_left, type_right, line): if operator == '*' or operator == '*=': if type_left.dim_Y != type_right.dim_X: print( "Error in line: " + str(line) + ": illegal operation: left matrix columns != right matrix rows." ) self.errors = True return BadType() elif operator == '/' or operator == '/=': if type_right.dim_X != type_right.dim_Y: print("Error in line: " + str(line) + ": illegal operation: right matrix is not invertible.") self.errors = True return BadType() elif type_left.dim_Y != type_right.dim_X: print( "Error in line: " + str(line) + ": illegal operation: left matrix columns != right matrix rows." ) self.errors = True return BadType() else: if type_left.dim_X != type_right.dim_X or type_left.dim_Y != type_right.dim_Y: print("Error in line: " + str(line) + ": illegal operation on different matrix size") self.errors = True return BadType() return Matrix(type_left.dim_X, type_right.dim_Y) def visit_Program(self, node): self.visit(node.instructions) def visit_Instructions(self, node): for instruction in node.instructions: self.visit(instruction) def visit_BinaryExpression(self, node): type_left = self.visit(node.expression_left) type_right = self.visit(node.expression_right) operator = node.operator if isinstance(type_left, VariableSymbol): if not isinstance(type_left.type, Matrix): if isinstance(type_right, VariableSymbol): expected_type = result_types[operator][type_left.type][ type_right.type] else: if not isinstance(type_right, Matrix): expected_type = result_types[operator][ type_left.type][type_right] else: expected_type = result_types[operator][type_left.type][ type_right.__class__.__name__] if not expected_type: print("Error in line: " + str(node.line) + ": illegal operation " + str(type_left) + " " + str(operator) + " " + str(type_right)) self.errors = True return BadType() return expected_type else: if operator != '*' and operator != '/': if not isinstance(type_right, VariableSymbol) and not isinstance( type_right, Matrix): print("Error in line: " + str(node.line) + ": illegal operation " + str(type_left) + " " + str(operator) + " " + str(type_right)) self.errors = True return BadType() else: if not isinstance(type_right, Matrix) and not isinstance( type_right.type, Matrix): print("Error in line: " + str(node.line) + ": illegal operation " + str(type_left) + " " + str(operator) + " " + str(type_right)) self.errors = True return BadType() else: if isinstance(type_right, Matrix): return self.verify_matrices( operator, type_left.type, type_right, node.line) else: return self.verify_matrices( operator, type_left.type, type_right.type, node.line) else: if isinstance(type_left, VariableSymbol): if isinstance(type_right, VariableSymbol): return self.verify_matrices( operator, type_left.type, type_right.type, node.line) else: return self.verify_matrices( operator, type_left.type, type_right, node.line) else: if isinstance(type_right, VariableSymbol): return self.verify_matrices( operator, type_left, type_right.type, node.line) else: return self.verify_matrices( operator, type_left, type_right, node.line) elif isinstance(type_left, Matrix): if isinstance(type_right, VariableSymbol): if not isinstance(type_right.type, Matrix): print("Error in line: " + str(node.line) + ": illegal operation on different matrix size") self.errors = True return BadType() else: return self.verify_matrices(operator, type_left, type_right.type, node.line) elif isinstance(type_right, Matrix): return self.verify_matrices(operator, type_left, type_right, node.line) else: expected_type = result_types[operator][ type_left.__class__.__name__][type_right] if not expected_type: print("Error in line: " + str(node.line) + ": illegal operation on different matrix size") self.errors = True return BadType() return expected_type else: if isinstance(type_right, VariableSymbol): #print(operator + ":" + type_left + ":" + str(type_right.type)) expected_type = result_types[operator][type_left][ type_right.type.__class__.__name__] else: if not isinstance(type_right, Matrix): expected_type = result_types[operator][type_left][ type_right] else: expected_type = result_types[operator][type_left][ type_right.__class__.__name__] if not expected_type: print("Error in line: " + str(node.line) + ": illegal operation " + str(type_left) + " " + str(operator) + " " + str(type_right)) self.errors = True return BadType() return expected_type def visit_NegUnaryExpression(self, node): t = self.visit(node.expression) if isinstance(t, VariableSymbol): if isinstance(t.type, str): type = result_types['-'][t.type] else: type = result_types['-'][t.type.__class__.__name__] else: if isinstance(t, str): type = result_types['-'][t] else: type = result_types['-'][t.__class__.__name__] if not type: self.errors = True print("Error in line: " + str(node.line) + ": invalid unary negation type") return type def visit_TransUnaryExpression(self, node): t = self.visit(node.expression) if isinstance(t, VariableSymbol): type = result_types['\''][t.type.__class__.__name__] else: type = result_types['\''][t.__class__.__name__] if not type: self.errors = True print("Error in line: " + str(node.line) + ": invalid transposition type") return type def visit_Variable(self, node): definition = self.symbol_table.get(node.name) if definition is None: self.errors = True print("Error in line: " + str(node.line) + ": unknown variable") return None else: return definition def visit_Constant(self, node): return node.type def visit_CompoundInstruction(self, node): self.visit(node.instructions) def visit_Assignment(self, node): type = self.visit(node.expression) if isinstance(node.variable, MatrixElement): var = self.symbol_table.get(node.variable.variable) if var is None: print("Error in line " + str(node.line) + ": no matrix with that name") self.errors = True else: self.visit(node.variable) else: var = self.symbol_table.get(node.variable.name) if var is not None: if str(var) != str(type): print("Warning in line " + str(node.line) + ": previously declared variable, type: " + str(var) + " now reassigning with type: " + str(type)) self.symbol_table.put(node.variable.name, VariableSymbol(node.variable.name, type)) self.visit(node.variable) def visit_CompoundAssignment(self, node): expression = self.visit(node.expression) operator = node.operator if isinstance(node.variable, MatrixElement): var = self.symbol_table.get(node.variable.variable) if var is None: print("Error in line " + str(node.line) + ": no matrix with that name") self.errors = True else: self.visit(node.variable) else: variable = self.symbol_table.get(node.variable.name) if not isinstance(variable.type, Matrix): if isinstance(expression, VariableSymbol): expected_type = result_types[operator][variable.type][ expression.type] else: expected_type = result_types[operator][ variable.type][expression] if not expected_type: print("Error in line: " + str(node.line) + ": illegal operation " + str(variable) + " " + str(operator) + " " + str(expression)) self.errors = True return BadType() return expected_type else: matrix_left = self.symbol_table.get(node.variable.name) if not isinstance(expression, VariableSymbol): expected_type = result_types[operator][ variable.type.__class__.__name__][expression] if not expected_type: print("Error in line: " + str(node.line) + ": illegal operation " + str(variable) + " " + str(operator) + " " + str(expression)) self.errors = True return BadType() return expected_type else: if not isinstance(expression.type, Matrix): print("Error in line: " + str(node.line) + ": illegal operation " + str(variable) + " " + str(operator) + " " + str(expression)) self.errors = True return BadType() else: return self.verify_matrices(operator, matrix_left.type, expression.type, node.line) def visit_MatrixElement(self, node): x = self.visit(node.row) y = self.visit(node.column) if x == 'int' and y == 'int': id = node.variable row = node.row column = node.column t = self.symbol_table.get(id) if isinstance(t, VariableSymbol) and isinstance(t.type, Matrix): if row.value >= t.type.dim_Y or column.value >= t.type.dim_X: self.errors = True print("Error in line: " + str(node.line) + ": index out of bound") return BadType() elif isinstance(t, Matrix): if row.value >= t.dim_Y or column.value >= t.dim_X: self.errors = True print("Error in line: " + str(node.line) + ": index out of bound") return BadType() else: self.errors = True print("Error in line: " + str(node.line) + ": this is not a matrix") else: print("Error in line: " + str(node.line) + ": index is not int") self.errors = True return BadType() def visit_ListsOfExpressions(self, node): size = -1 for expression_list in node.expression_lists: next_size = self.visit(expression_list) if size == -1: size = next_size if size != next_size: print("Error in line: " + str(node.line) + ": Different rows size " + str(size) + " and " + str(next_size)) self.errors = True return BadType() return Matrix(len(node.expression_lists), size) def visit_MatrixAssignment(self, node): var = self.symbol_table.get(node.variable.name) if var is not None: print( "Warning in line " + str(node.line) + ": previously declared variable, now reassigning with type: " + str(Matrix.__name__)) matrix = self.visit(node.expression_list) self.symbol_table.put(node.variable.name, VariableSymbol(node.variable.name, matrix)) def visit_PrintExpression(self, node): for expression in node.expression_list: self.visit(expression) def visit_ListOfExpressions(self, node): for expression in node.expression_list: self.visit(expression) return len(node.expression_list) def visit_PrintInstructions(self, node): self.visit(node.expressions_list) def visit_ZerosInitialization(self, node): type = self.visit(node.expression) if isinstance(type, VariableSymbol): variable_type = type.type if variable_type != 'int': print("Error in line: " + str(node.line) + ": cannot initialize zeros with " + variable_type) self.errors = True return BadType() dim = self.get_dim(node.expression) return Matrix(dim, dim) else: if type != 'int': print("Error in line: " + str(node.line) + ": cannot initialize zeros with this expression") self.errors = True return BadType() dim = self.get_dim(node.expression) return Matrix(dim, dim) def visit_OnesInitialization(self, node): type = self.visit(node.expression) if isinstance(type, VariableSymbol): variable_type = type.type if variable_type != 'int': print("Error in line: " + str(node.line) + ": cannot initialize ones with " + variable_type) self.errors = True return BadType() dim = self.get_dim(node.expression) return Matrix(dim, dim) else: if type != 'int': print("Error in line: " + str(node.line) + ": cannot initialize ones with this expression") self.errors = True return BadType() dim = self.get_dim(node.expression) return Matrix(dim, dim) def visit_EyeInitialization(self, node): type = self.visit(node.expression) if isinstance(type, VariableSymbol): variable_type = type.type if variable_type != 'int': print("Error in line: " + str(node.line) + ": cannot initialize eye with " + variable_type) self.errors = True return BadType() dim = self.get_dim(node.expression) return Matrix(dim, dim) else: if type != 'int': print("Error in line: " + str(node.line) + ": cannot initialize eye with this expression") self.errors = True return BadType() dim = self.get_dim(node.expression) return Matrix(dim, dim) def visit_BreakInstruction(self, node): if self.loop_nest <= 0: print("Error in line: " + str(node.line) + ": break outside the loop") self.errors = True return None def visit_ContinueInstruction(self, node): if self.loop_nest <= 0: print("Error in line: " + str(node.line) + ": continue outside the loop") self.errors = True return None def visit_IfInstruction(self, node): self.visit(node.condition) inner_scope = SymbolTable(self.symbol_table, "if") self.symbol_table = inner_scope self.visit(node.instruction) self.symbol_table = self.symbol_table.getParentScope() def visit_IfElseInstruction(self, node): self.visit(node.condition) inner_scope = SymbolTable(self.symbol_table, "if") self.symbol_table = inner_scope self.visit(node.instruction) self.symbol_table = self.symbol_table.getParentScope() inner_scope = SymbolTable(self.symbol_table, "else") self.symbol_table = inner_scope self.visit(node.else_instruction) self.symbol_table = self.symbol_table.getParentScope() def visit_WhileInstruction(self, node): self.loop_nest = self.loop_nest + 1 inner_scope = SymbolTable(self.symbol_table, 'while' + str(self.loop_nest)) self.symbol_table = inner_scope self.visit(node.condition) self.visit(node.instruction) self.symbol_table = self.symbol_table.getParentScope() self.loop_nest = self.loop_nest - 1 def visit_ForInstruction(self, node): self.loop_nest = self.loop_nest + 1 inner_scope = SymbolTable(self.symbol_table, 'for' + str(self.loop_nest)) self.symbol_table = inner_scope type = self.visit(node.start) if str(type) != 'int': print("Error in line: " + str(node.line) + ": invalid range type: " + str(type)) self.errors = True type = self.visit(node.end) if str(type) != 'int': print("Error in line: " + str(node.line) + ": invalid range type: " + str(type)) self.errors = True self.symbol_table.put(node.variable.name, type) self.visit(node.instruction) self.symbol_table = self.symbol_table.getParentScope() self.loop_nest = self.loop_nest - 1 def visit_ReturnInstruction(self, node): type = self.visit(node.expression) if self.symbol_table.getParentScope() is None: print("Error in line: " + str(node.line) + ": return in outer of scope") self.errors = True return BadType() return type def get_dim(self, val): if isinstance(val, Constant): return val.value elif isinstance(val, Variable): return val.name elif isinstance(val, int): return val
def visit_CompoundInstr(self, node): self.symbol_table = SymbolTable(self.symbol_table, "inner") self.visit(node.declarations) self.visit(node.instructions_opt) self.symbol_table = self.symbol_table.getParentScope()
def __init__(self): self.symbol_table = SymbolTable(None, "global") self.loop_nest = 0 self.errors = False
class TypeChecker(object): def __init__(self): self.errorsOcurred=False operators = ['+','-','*','/','%','|','&','^','&&','||','<<','>>','==','!=','<','>','<=','>=','f'] types = ['int','float','string'] self.ttype = dict((key,dict((key,{}) for key in types)) for key in operators) self.ttype['+']['int']['float'] = 'float' self.ttype['+']['float']['int'] = 'float' self.ttype['+']['float']['float'] = 'float' self.ttype['+']['int']['int'] = 'int' self.ttype['+']['string']['string'] = 'string' self.ttype['-']['int']['float'] = 'float' self.ttype['-']['float']['int'] = 'float' self.ttype['-']['float']['float'] = 'float' self.ttype['-']['int']['int'] = 'int' self.ttype['*']['int']['float'] = 'float' self.ttype['*']['float']['int'] = 'float' self.ttype['*']['float']['float'] = 'float' self.ttype['*']['int']['int'] = 'int' self.ttype['/']['int']['float'] = 'float' self.ttype['/']['float']['int'] = 'float' self.ttype['/']['float']['float'] = 'float' self.ttype['/']['int']['int'] = 'int' self.ttype['%']['int']['int'] = 'int' self.ttype['|']['int']['int'] = 'int' self.ttype['&']['int']['int'] = 'int' self.ttype['^']['int']['int'] = 'int' self.ttype['&&']['int']['int'] = 'int' self.ttype['||']['int']['int'] = 'int' self.ttype['<<']['int']['int'] = 'int' self.ttype['>>']['int']['int'] = 'int' self.ttype['==']['int']['int'] = 'int' self.ttype['==']['int']['float'] = 'int' self.ttype['==']['float']['int'] = 'int' self.ttype['==']['float']['float'] = 'int' self.ttype['==']['string']['string'] = 'int' self.ttype['!=']['int']['int'] = 'int' self.ttype['!=']['int']['float'] = 'int' self.ttype['!=']['float']['int'] = 'int' self.ttype['!=']['float']['float'] = 'int' self.ttype['!=']['string']['string'] = 'int' self.ttype['<']['int']['int'] = 'int' self.ttype['<']['int']['float'] = 'int' self.ttype['<']['float']['int'] = 'int' self.ttype['<']['float']['float'] = 'int' self.ttype['<']['string']['string'] = 'int' self.ttype['>']['int']['int'] = 'int' self.ttype['>']['int']['float'] = 'int' self.ttype['>']['float']['int'] = 'int' self.ttype['>']['float']['float'] = 'int' self.ttype['>']['string']['string'] = 'int' self.ttype['<=']['int']['int'] = 'int' self.ttype['<=']['int']['float'] = 'int' self.ttype['<=']['float']['int'] = 'int' self.ttype['<=']['float']['float'] = 'int' self.ttype['<=']['string']['string'] = 'int' self.ttype['>=']['int']['int'] = 'int' self.ttype['>=']['int']['float'] = 'int' self.ttype['>=']['float']['int'] = 'int' self.ttype['>=']['float']['float'] = 'int' self.ttype['>=']['string']['string'] = 'int' self.ttype['f']['string']['string'] = 'string' self.ttype['f']['int']['int'] = 'int' self.ttype['f']['float']['float'] = 'float' self.ttype['f']['float']['int'] = 'float' def error(self,text,line): self.errorsOcurred=True print "********************************" print "Error: "+text print "Line " +str(line) print "********************************" def visit_Program(self,node): try: #print "visiting Program" self.symbolTable=SymbolTable(None,'main') node.declarations.accept(self) node.fundefs.accept(self) node.instructions.accept(self) except: self.error("could not continue parsing, correct errors first",0) def visit_Declarations(self,node): #print "visiting Declarations" for element in node.list : element.accept(self) def visit_Declaration(self,node): #print "visiting Declaration" declType = node.type allInits = node.inits.accept(self) for element in allInits: [type,id] = element if self.symbolTable.get(id.value) != None: self.error("Symbol: "+id.value+", was previusly declared",id.line) try: self.ttype['f'][declType][type] except: self.error("cannot initialize symbol of type: "+declType+", with expression of type: "+type,id.value) self.symbolTable.put(id.value,type) def visit_Inits(self,node): #print "visiting Inits" toReturn=[] for element in node.list: toReturn.append(element.accept(self)) return toReturn def visit_Init(self,node): #print "visiting Init" return [node.expression.accept(self),node.id] def visit_Instructions(self,node): #print "visiting Instructions" self.symbolTable = SymbolTable(self.symbolTable,'instructions') for element in node.list : element.accept(self) self.symbolTable = self.symbolTable.getParentScope() def visit_PrintInstr(self,node): #print "visiting PrintInstr" if node.expression.accept(self) not in ['string','int','float']: self.error("cannot print expression of that type",node.line) def visit_LabeledInstr(self,node): #print "visiting LabeledInstr" node.instruction.accept(self) def visit_Assignment(self,node): #print "visiting Assignment" if self.symbolTable.getIncludingParents(node.id.value) == None: self.error("unknown symbol name: "+id.value,id.line) try: idType = node.id.accept(self) exprType = node.expression.accept(self) self.ttype['f'][idType][exprType] except: self.error("cannot assign "+exprType+" to "+idType,node.id.line) def visit_ChoiceInstr(self,node): #print "visiting ChoiceInstr" node.condition.accept(self) node.instruction.accept(self) node.elseInstruction.accept(self) def visit_Break(self,node): #print "visiting Break" pass def visit_Continue(self,node): #print "visiting Continue" pass def visit_WhileInstr(self,node): #print "visiting While" node.condition.accept(self) self.symbolTable = SymbolTable(self.symbolTable,'while') node.instruction.accept(self) self.symbolTable = self.symbolTable.getParentScope() def visit_RepeatInstr(self,node): #print "visiting Repeat" node.instructions.accept(self) node.condition.accept(self) def visit_ReturnInstr(self,node): #print "visiting Return" node.expression.accept(self) #todo check somehow def visit_CompoundInstr(self,node): #print "visiting CompoundInstr" #self.symbolTable = SymbolTable(self.symbolTable,'compoundInstr') node.declarations.accept(self) node.instructions.accept(self) #self.symbolTable = self.symbolTable.getParentScope() def visit_Condition(self,node): #print "visiting Condition" if node.expression.accept(self) not in ('int'): self.error("condition must be of int type",node.line) def visit_Integer(self,node): #print "visiting Integer" return 'int' def visit_Float(self,node): #print "visiting Float" return 'float' def visit_String(self,node): #print "visiting String" return 'string' def visit_Id(self,node): #print "visiting Id" if self.symbolTable.getIncludingParents(node.value): return self.symbolTable.getIncludingParents(node.value) self.error("undefined symbol: "+node.value,node.line) def visit_ParExpr(self,node): #print "visiting ParExpr" return node.expression.accept(self) def visit_BinExpr(self,node): operator = node.operator first = node.first.accept(self) second = node.second.accept(self) #print "visiting BinExpr" #print first #print operator #print second try: return self.ttype[operator][first][second] except: self.error("cannot compute operation: " +operator+",on arguments: "+first+", "+second,node.first.line) def visit_FunExpr(self,node): #print "visiting FunExpr" funSymbol = self.symbolTable.getIncludingParents(node.id.value) for i in range(len(node.expressionList.list)): try: baseArgType = funSymbol.argList[i] givenArgType = node.expressionList.list[i].accept(self) self.ttype['f'][baseArgType][givenArgType] except: self.error("bad argument in funcall",node.line) return funSymbol.type def visit_ExprList(self,node): #print "visiting ExprList" toReturn = [] for element in node.list: toReturn.append(element.accept(self)) return toReturn def visit_FunDefs(self,node): #print "visiting FunDefs" for element in node.list : element.accept(self) def visit_FunDef(self,node): #print "visiting FunDef" self.symbolTable = SymbolTable(self.symbolTable,node.id.value) self.symbolTable.getParentScope().put(node.id.value,FunSymbol(node.type,node.id.value,map(lambda x : x.accept(self),node.argList.list))) node.compoundInstr.accept(self) self.symbolTable = self.symbolTable.getParentScope() def visit_ArgList(self,node): #print "visiting ArgList" toReturn = [] for element in node.list: toReturn.append(element.accept(self)) return toReturn def visit_Arg(self,node): #print "visiting Arg" self.symbolTable.put(node.id.value,node.type) return node.type
def scan(self, filename): symbolTableIdentifiers = SymbolTable(37) symbolTableConstants = SymbolTable(37) pif = PIF() file = open(filename, 'r') lines = file.readlines() error = [] count = 0 for line in lines: line = line.strip() tokensOnLine = line.strip().split() tokens = [] regexForTokens = "[^a-zA-Z0-9\"]" for token in tokensOnLine: tokens = tokens + re.split('(' + regexForTokens + ')', token) while ('' in tokens): tokens.remove('') while (None in tokens): tokens.remove(None) i = 0 while i < len(tokens): j = i + 1 if (i < len(tokens) - 1): if tokens[i] == "=" and tokens[i + 1] == "=": tokens[i] = "==" j = i + 2 if (i < len(tokens) - 2): if tokens[i] == "=" and tokens[i + 1] == "/" and tokens[ i + 2] == "=": tokens[i] = "=/=" j = i + 3 if (i < len(tokens) - 1): if tokens[i] == "<" and tokens[i + 1] == "=": tokens[i] = "<=" j = i + 2 if (i < len(tokens) - 1): if tokens[i] == ">" and tokens[i + 1] == "=": tokens[i] = ">=" j = i + 2 if tokens[i] in self.__reservedWordsAndSeparators: pif.genPIF(tokens[i], (0, 0)) else: if self.checkIfIdentifier(tokens[i]): pif.genPIF(self.__identifierCode, symbolTableIdentifiers.position(tokens[i])) else: if self.checkIfNumberConstant(tokens[i]): pif.genPIF( self.__constantCode, symbolTableConstants.position(tokens[i])) else: if self.checkIfStringConstant(tokens[i]): pif.genPIF( self.__constantCode, symbolTableConstants.position(tokens[i])) else: error.append("error on line " + str(count) + " for token " + tokens[i]) i = j print("Line{}: {}".format(count, tokens)) count += 1 file.close() with open('PIF.out', 'w') as f: print('PIF:\n', pif.getPIF(), file=f) with open('ST_constants.out', 'w') as f: print('ST_constants:\n', str(symbolTableConstants), file=f) with open('ST_identifiers.out', 'w') as f: print('ST_identifiers:\n', str(symbolTableIdentifiers), file=f) if (error == []): print("Lexically correct.") else: print(error)
class TypeChecker(NodeVisitor): def __init__(self): self.symbol_table = SymbolTable(None, "TypeChecker", {}) self.ttypes = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None))) self.fill_ttypes() def fill_ttypes(self): # arithmetic int operations self.add_ttype('+', 'int', 'int', 'int') self.add_ttype('-', 'int', 'int', 'int') self.add_ttype('*', 'int', 'int', 'int') self.add_ttype('/', 'int', 'int', 'int') self.add_ttype('%', 'int', 'int', 'int') # binary int operations self.add_ttype('&', 'int', 'int', 'int') self.add_ttype('|', 'int', 'int', 'int') self.add_ttype('^', 'int', 'int', 'int') self.add_ttype('<<', 'int', 'int', 'int') self.add_ttype('>>', 'int', 'int', 'int') # arithmetic float operations self.add_ttype('+', 'float', 'float', 'float') self.add_ttype('-', 'float', 'float', 'float') self.add_ttype('*', 'float', 'float', 'float') self.add_ttype('/', 'float', 'float', 'float') self.add_ttype('%', 'float', 'float', 'float') self.add_ttype('+', 'int', 'float', 'float') self.add_ttype('-', 'int', 'float', 'float') self.add_ttype('*', 'int', 'float', 'float') self.add_ttype('/', 'int', 'float', 'float') self.add_ttype('%', 'int', 'float', 'float') self.add_ttype('+', 'float', 'int', 'float') self.add_ttype('-', 'float', 'int', 'float') self.add_ttype('*', 'float', 'int', 'float') self.add_ttype('/', 'float', 'int', 'float') self.add_ttype('%', 'float', 'int', 'float') # relational int operations self.add_ttype('==', 'int', 'int', 'int') self.add_ttype('!=', 'int', 'int', 'int') self.add_ttype('<', 'int', 'int', 'int') self.add_ttype('>', 'int', 'int', 'int') self.add_ttype('<=', 'int', 'int', 'int') self.add_ttype('>=', 'int', 'int', 'int') # relational float operations self.add_ttype('==', 'float', 'float', 'float') self.add_ttype('!=', 'float', 'float', 'float') self.add_ttype('<', 'float', 'float', 'float') self.add_ttype('>', 'float', 'float', 'float') self.add_ttype('<=', 'float', 'float', 'float') self.add_ttype('>=', 'float', 'float', 'float') self.add_ttype('==', 'int', 'float', 'float') self.add_ttype('!=', 'int', 'float', 'float') self.add_ttype('<', 'int', 'float', 'float') self.add_ttype('>', 'int', 'float', 'float') self.add_ttype('<=', 'int', 'float', 'float') self.add_ttype('>=', 'int', 'float', 'float') self.add_ttype('==', 'float', 'int', 'float') self.add_ttype('!=', 'float', 'int', 'float') self.add_ttype('<', 'float', 'int', 'float') self.add_ttype('>', 'float', 'int', 'float') self.add_ttype('<=', 'float', 'int', 'float') self.add_ttype('>=', 'float', 'int', 'float') # string operations self.add_ttype('+', 'string', 'string', 'string') self.add_ttype('*', 'string', 'int', 'string') self.add_ttype('==', 'string', 'string', 'string') self.add_ttype('!=', 'string', 'string', 'string') self.add_ttype('<', 'string', 'string', 'string') self.add_ttype('>', 'string', 'string', 'string') self.add_ttype('<=', 'string', 'string', 'string') self.add_ttype('>=', 'string', 'string', 'string') def add_ttype(self, operation, operand1, operand2, returned): self.ttypes[operation][operand1][operand2] = returned def visit_Name(self, node): return node.name def visit_CheckedName(self, node): if self.symbol_table.get(node.name): return node.name else: print("Error: Usage of undeclared variable '%s': line %s" % (node.name, node.lineno)) return node.name def visit_Operator(self, node): return node.op def visit_Integer(self, node): return 'int' def visit_Float(self, node): return 'float' def visit_String(self, node): return 'string' def visit_Program(self, node): self.symbol_table = self.symbol_table.push_scope("Program") for item in node.body: self.visit(item) self.symbol_table = self.symbol_table.pop_scope() def visit_Declaration(self, node): for init in node.inits: name = self.visit(init.name) var = self.symbol_table.get_declared_var(name) if var: if isinstance(var, FunctionDefSymbol): print("Error: Function identifier '%s' used as a variable: line %d" % (name, node.lineno)) else: print("Error: Variable '%s' already declared: line %d" % (name, node.lineno)) else: self.symbol_table.put(name, VariableSymbol(init.name, node.var_type)) self.visit(init) def visit_Initializer(self, node): expression_ret_type = self.get_return_type(node.expression) declared_type = self.get_return_type(node.name) if expression_ret_type and declared_type: TypeChecker.check_type_consistency(node, declared_type, expression_ret_type) def visit_PrintInstr(self, node): for item in node.expr_list: self.visit(item) def visit_LabeledInstr(self, node): self.visit(node.instruction) def visit_Assignment(self, node): declared_type = self.get_return_type(node.target) expression_ret_type = self.get_return_type(node.value) if not declared_type: print("Error: Variable '%s' undefined in current scope: line %d" % (self.visit(node.target), node.lineno)) elif expression_ret_type: TypeChecker.check_type_consistency(node, declared_type, expression_ret_type) def visit_IfInstr(self, node): self.visit(node.condition) self.visit(node.body) if node.else_body: self.visit(node.else_body) def visit_WhileInstr(self, node): self.visit(node.condition) self.symbol_table.set_inside_loop(1) self.visit(node.body) self.symbol_table.set_inside_loop(0) def visit_RepeatInstr(self, node): self.symbol_table.set_inside_loop(1) for item in node.body: self.visit(item) self.visit(node.condition) self.symbol_table.set_inside_loop(0) def visit_ReturnInstr(self, node): ret_type = self.get_return_type(node.expression) scope = self.symbol_table while scope and scope.name != "FunctionDef": scope = scope.get_parent_scope() if scope: fun_def = scope.get(scope.function_name) if fun_def: function_ret_type = fun_def.type.name scope.set_return_present(1) if ret_type and function_ret_type: if function_ret_type == 'int' and ret_type == 'float': print("Warning: Possible loss of precision: returning %s from function returning %s: line %s" % (ret_type, function_ret_type, node.lineno)) elif function_ret_type == 'float' and ret_type == 'int': pass elif ret_type != function_ret_type: print("Error: Improper returned type, expected %s, got %s: line %s" % (function_ret_type, ret_type, node.lineno)) else: # should not happen... print("something bad happened while parsing or checking") else: print("Error: return instruction outside a function: line %s" % node.lineno) def visit_ContinueInstr(self, node): if not self.symbol_table.is_inside_loop(): print("Error: continue instruction outside a loop: line %s" % node.lineno) def visit_BreakInstr(self, node): if not self.symbol_table.is_inside_loop(): print("Error: break instruction outside a loop: line %s" % node.lineno) def visit_CompoundInstr(self, node): self.symbol_table = self.symbol_table.push_scope("CompoundInstr") for item in node.declarations: self.visit(item) for item in node.instructions: self.visit(item) self.symbol_table = self.symbol_table.pop_scope() def visit_BinaryExpr(self, node): type_1 = self.get_return_type(node.left) if not type_1: print("Error: Usage of undeclared variable '%s': line %s" % (self.visit(node.left), node.lineno)) type_2 = self.get_return_type(node.right) if not type_2: print("Error: Usage of undeclared variable '%s': line %s" % (self.visit(node.right), node.lineno)) op = self.visit(node.op) ret = self.ttypes[op][type_1][type_2] if not ret: print("Error: Illegal operation, %s %s %s: line %s" % (type_1, op, type_2, node.left.lineno)) return ret def visit_MethodCallExpr(self, node): name = self.visit(node.name) fun_def = self.symbol_table.get(name) if fun_def: if len(fun_def.args) != len(node.args): print("Error: Improper number of args in %s call: line %s" % (fun_def.name.name, node.lineno)) else: for (fun_arg, call_arg) in zip(fun_def.args, node.args): fun_arg_type = fun_arg.arg_type.name call_arg_type = self.get_return_type(call_arg) if fun_arg_type and call_arg_type: if fun_arg_type == 'int' and call_arg_type == 'float': print("Warning: Possible loss of precision: passing %s instead of %s: line %s" % (call_arg_type, fun_arg_type, node.lineno)) elif fun_arg_type == 'float' and call_arg_type == 'int': pass elif call_arg_type != fun_arg_type: print("Error: Improper type of args in %s call: line %s" % (name, node.lineno)) break return fun_def.type.name else: print("Error: Call of undefined function: '%s': line %s" % (name, node.lineno)) def visit_FunctionDef(self, node): name = self.visit(node.name) if self.symbol_table.get(name): print("Error: Redefinition of function '%s': line %s" % (name, node.lineno)) self.symbol_table.put(name, FunctionDefSymbol(node.name, node.return_type, node.args)) self.symbol_table = self.symbol_table.push_scope("FunctionDef") self.symbol_table.put(name, FunctionDefSymbol(node.name, node.return_type, node.args)) self.symbol_table.function_name = name for arg in node.args: self.visit(arg) self.symbol_table.set_return_present(0) self.visit(node.body) if not self.symbol_table.get_return_present(): print("Error: Missing return statement in function '%s' returning %s: line %s" % (name, self.visit(node.return_type), node.lineno)) self.symbol_table = self.symbol_table.pop_scope() def visit_Argument(self, node): name = self.visit(node.name) if self.symbol_table.name == "FunctionDef": self.symbol_table.put(name, VariableSymbol(node.name, node.arg_type)) def get_return_type(self, node): if isinstance(node, AST.Name): var = self.symbol_table.get(node.name) return None if not var else var.type.name return self.visit(node) @staticmethod def check_type_consistency(node, declared_type, expression_ret_type): if declared_type == 'int' and expression_ret_type == 'float': print("Warning: Possible loss of precision: assignment of %s to %s: line %s" % (expression_ret_type, declared_type, node.lineno)) elif declared_type == 'float' and expression_ret_type == 'int': pass elif declared_type != expression_ret_type: print("Error: Assignment of %s to %s: line %s" % (expression_ret_type, declared_type, node.lineno))
class CompilationEngine: KEYWORD = 'KEYWORD' SYMBOL = 'SYMBOL' IDENTIFIER = 'IDENTIFIER' INT_CONST = 'INT_CONST' STRING_CONST = 'STRING_CONST' CLASS = 'class' METHOD = 'method' FUNCTION = 'function' CONSTRUCTOR = 'constructor' INT = 'int' BOOLEAN = 'boolean' CHAR = 'char' VOID = 'void' VAR = 'var' STATIC = 'static' FIELD = 'field' LET = 'let' DO = 'do' IF = 'if' ELSE = 'else' WHILE = 'while' RETURN = 'return' TRUE = 'true' FALSE = 'false' NULL = 'null' THIS = 'this' SEMICOLAN = ';' L_PARENTHESES = '(' R_PARENTHESES = ')' L_BRACKET = '[' R_BRACKET = ']' #creates a new compilation engine with the given input #and output. The next routine called must be compileClass() def __init__(self, tokenizer, output_stream, vm_writer): print('CompilationEngine is initializing') #self.output = open(output_stream, 'w') self.tokenizer = tokenizer self.symboltable = SymbolTable() self.vm_writer = vm_writer self.label = 0 #goes to the first token in the stream self.tokenizer.advance() def produceLabel(self): label = 'LABEL_' + str(self.label) self.label = self.label + 1 return label def eat_token_type(self, tok_type): tok = self.tokenizer.current_token if self.tokenizer.tokenType() == tok_type: self.tokenizer.advance() return tok else: raise Exception('Expected a token of type ' + tok_type + ' but found ' + tok) def eat_token(self, tok_type, valid_set): tok = self.tokenizer.current_token if self.tokenizer.tokenType() == tok_type and tok in valid_set: self.tokenizer.advance() return tok else: raise Exception('Expected a token of type ' + tok_type + ' from the set: ' + valid_set + ' but found ' + tok) def compileClass(self): self.eat_token(self.KEYWORD, {self.CLASS}) self.tokenizer.class_name = self.eat_token_type( self.IDENTIFIER) #className self.eat_token(self.SYMBOL, {'{'}) #classVarDec* while self.tokenizer.tokenType( ) == self.KEYWORD and self.tokenizer.keyWord() in {'static', 'field'}: self.compileClassVarDec() #subroutineDec* while self.tokenizer.tokenType( ) == self.KEYWORD and self.tokenizer.keyWord() in { 'constructor', 'function', 'method' }: self.compileSubroutine() self.eat_token(self.SYMBOL, {'}'}) self.vm_writer.close() return def compileVoidOrType(self): if (self.tokenizer.tokenType() == self.KEYWORD and self.tokenizer.keyWord() in {'int', 'char', 'boolean', 'void'}): self.tokenizer.advance() elif (self.tokenizer.tokenType() == self.IDENTIFIER): self.tokenizer.advance() else: raise Exception('expected int, char, boolean, or className') def compileType(self): curr_tok = self.tokenizer.current_token tok_type = self.tokenizer.tokenType() if (tok_type == self.KEYWORD and curr_tok in {self.INT, self.CHAR, self.BOOLEAN}): self.tokenizer.advance() elif (tok_type == self.IDENTIFIER): self.tokenizer.advance() else: raise Exception('expected int, char, boolean, or className') return curr_tok def compileClassVarDec(self): identifier_kind = self.eat_token(self.KEYWORD, {self.STATIC, self.FIELD}) identifier_type = self.compileType() identifier_name = self.eat_token_type(self.IDENTIFIER) self.symboltable.define(identifier_name, identifier_type, identifier_kind, False) while self.tokenizer.tokenType( ) == self.SYMBOL and self.tokenizer.symbol() == ',': self.tokenizer.advance() identifier_name = self.eat_token_type(self.IDENTIFIER) self.symboltable.define(identifier_name, identifier_type, identifier_kind, False) self.eat_token(self.SYMBOL, {';'}) return def compileSubroutine(self): #reset subrountine symbol table self.symboltable.startSubroutine() subroutine_type = self.eat_token( self.KEYWORD, {self.CONSTRUCTOR, self.FUNCTION, self.METHOD}) if subroutine_type == self.METHOD: self.symboltable.define('this', self.tokenizer.class_name, 'argument', True) self.compileVoidOrType() #('void' | type) subroutine_identifier = self.eat_token_type( self.IDENTIFIER) # subroutineName:identifier self.eat_token(self.SYMBOL, {'('}) self.compileParameterList() #parameterList: self.eat_token(self.SYMBOL, {')'}) #subroutineBody self.eat_token(self.SYMBOL, {'{'}) #varDec* while self.tokenizer.tokenType( ) == self.KEYWORD and self.tokenizer.keyWord() == self.VAR: self.compileVarDec() #we can now write to the vm file the declaration self.vm_writer.writeFunction( self.tokenizer.class_name + '.' + subroutine_identifier, self.symboltable.count_map['local']) #if constructor if subroutine_type == self.CONSTRUCTOR: self.vm_writer.writePush('constant', self.symboltable.count_map['field']) self.vm_writer.writeCall('Memory.alloc', 1) self.vm_writer.writePop('pointer', 0) if subroutine_type == self.METHOD: self.vm_writer.writePush('argument', 0) self.vm_writer.writePop('pointer', 0) self.compileStatements() self.eat_token(self.SYMBOL, {'}'}) return def compileParameterList(self): #perhaps empty if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol( ) == ')': return identifier_type = self.compileType() identifier_name = self.eat_token_type(self.IDENTIFIER) self.symboltable.define(identifier_name, identifier_type, 'argument', True) # comma ,,,,, while self.tokenizer.tokenType( ) == self.SYMBOL and self.tokenizer.symbol() == ',': self.tokenizer.advance() identifier_type = self.compileType() identifier_name = self.eat_token_type(self.IDENTIFIER) self.symboltable.define(identifier_name, identifier_type, 'argument', True) return def compileVarDec(self): self.eat_token(self.KEYWORD, {self.VAR}) #var identifier_kind = 'local' identifier_type = self.compileType() identifier_name = self.eat_token_type(self.IDENTIFIER) #varName self.symboltable.define(identifier_name, identifier_type, identifier_kind, True) while self.tokenizer.tokenType( ) == self.SYMBOL and self.tokenizer.symbol() == ',': self.tokenizer.advance() identifier_name = self.eat_token_type(self.IDENTIFIER) self.symboltable.define(identifier_name, identifier_type, identifier_kind, True) self.eat_token(self.SYMBOL, {';'}) return def compileStatements(self): statements_set = {self.LET, self.IF, self.WHILE, self.DO, self.RETURN} while self.tokenizer.tokenType( ) == self.KEYWORD and self.tokenizer.keyWord() in statements_set: tok = self.tokenizer.current_token if tok == self.LET: self.compileLet() if tok == self.IF: self.compileIf() if tok == self.WHILE: self.compileWhile() if tok == self.DO: self.compileDo() if tok == self.RETURN: self.compileReturn() return def compileDo(self): self.eat_token(self.KEYWORD, {self.DO}) initial_identifier = self.eat_token_type(self.IDENTIFIER) is_method_call = False if self.tokenizer.current_token not in {'.', '('}: raise Exception( 'Excepted either a period or a left parentheses but instead: ' + self.tokenizer.current_token) # varName|className.subroutine() if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol( ) == '.': self.tokenizer.advance() dotId = self.eat_token_type( self.IDENTIFIER) #subroutineName:identifier #is the initial_identifier a className or a varName? if self.symboltable.contains(initial_identifier): is_method_call = True segment = self.symboltable.kindOf(initial_identifier) if segment == 'field': segment = 'this' self.vm_writer.writePush( segment, self.symboltable.indexOf(initial_identifier)) full_call_identifier = self.symboltable.typeOf( initial_identifier) + '.' + dotId else: full_call_identifier = initial_identifier + '.' + dotId #subroutine() elif self.tokenizer.tokenType( ) == self.SYMBOL and self.tokenizer.symbol() == '(': is_method_call = True self.vm_writer.writePush('pointer', 0) full_call_identifier = self.tokenizer.class_name + '.' + initial_identifier self.eat_token(self.SYMBOL, {'('}) n_parameters = self.compileExpressionList() #expressionList self.eat_token(self.SYMBOL, {')'}) #add the extra argument for the method call if is_method_call: n_parameters = n_parameters + 1 self.eat_token(self.SYMBOL, {';'}) #semi-colon ;;;;;; self.vm_writer.writeCall(full_call_identifier, n_parameters) self.vm_writer.writePop('temp', 0) return def compileLet(self): self.eat_token(self.KEYWORD, {self.LET}) lhs_var = self.eat_token_type(self.IDENTIFIER) #varName is_array = False #what type of variable is this? #array indexing into the variable if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol( ) == '[': is_array = True self.tokenizer.advance() segment = self.symboltable.kindOf(lhs_var) if segment == 'field': segment = 'this' self.vm_writer.writePush(segment, self.symboltable.indexOf(lhs_var)) self.compileExpression() self.vm_writer.writeArithmetic('+', True) self.eat_token(self.SYMBOL, {']'}) self.eat_token(self.SYMBOL, {'='}) self.compileExpression() self.eat_token(self.SYMBOL, {';'}) if is_array: self.vm_writer.writePop('temp', 0) self.vm_writer.writePop('pointer', 1) self.vm_writer.writePush('temp', 0) self.vm_writer.writePop('that', 0) else: segment = self.symboltable.kindOf(lhs_var) if segment == 'field': segment = 'this' self.vm_writer.writePop(segment, self.symboltable.indexOf(lhs_var)) return def compileWhile(self): self.eat_token(self.KEYWORD, {self.WHILE}) label_1 = self.produceLabel() label_2 = self.produceLabel() self.vm_writer.writeLabel(label_1) self.eat_token(self.SYMBOL, {'('}) self.compileExpression() self.eat_token(self.SYMBOL, {')'}) self.vm_writer.writeArithmetic('~', False) #negate the expression self.vm_writer.writeIf(label_2) self.eat_token(self.SYMBOL, {'{'}) self.compileStatements() self.eat_token(self.SYMBOL, {'}'}) self.vm_writer.writeGoto(label_1) #goto LABEL_1 self.vm_writer.writeLabel(label_2) #label LABEL_2 return def compileReturn(self): self.eat_token(self.KEYWORD, {self.RETURN}) if self.tokenizer.current_token != ';': # expression? self.compileExpression() self.eat_token(self.SYMBOL, {';'}) else: #no expression self.eat_token(self.SYMBOL, {';'}) self.vm_writer.writePush('constant', 0) self.vm_writer.writeReturn() return def compileIf(self): self.eat_token(self.KEYWORD, {self.IF}) label_1 = self.produceLabel() label_2 = self.produceLabel() #might not be used if there is no else self.eat_token(self.SYMBOL, {'('}) self.compileExpression() #expression self.eat_token(self.SYMBOL, {')'}) self.vm_writer.writeArithmetic('~', False) #negate the expression self.vm_writer.writeIf(label_1) self.eat_token(self.SYMBOL, {'{'}) self.compileStatements() self.eat_token(self.SYMBOL, {'}'}) #maybe else if self.tokenizer.tokenType( ) == self.KEYWORD and self.tokenizer.keyWord() == self.ELSE: self.vm_writer.writeGoto(label_2) #goto LABEL_2 self.vm_writer.writeLabel(label_1) #label LABEL_1 self.tokenizer.advance() self.eat_token(self.SYMBOL, {'{'}) self.compileStatements() self.eat_token(self.SYMBOL, {'}'}) self.vm_writer.writeLabel(label_2) #label LABEL_2 else: self.vm_writer.writeLabel(label_1) #label LABEL_1 return #perhaps make a set above to contain all the operators? op_set = { '+', '-', '*', '/', '&', '|', '<', '>', '=', '<', '>', '&' } def compileExpression(self): self.compileTerm() while self.tokenizer.tokenType( ) == self.SYMBOL and self.tokenizer.symbol() in self.op_set: operator = self.tokenizer.current_token self.tokenizer.advance() self.compileTerm() self.vm_writer.writeArithmetic(operator, True) return def compileTerm(self): #integerConstant if self.tokenizer.tokenType() == self.INT_CONST: #push the constant to the stack self.vm_writer.writePush('constant', self.tokenizer.current_token) self.tokenizer.advance() return #stringConstant if self.tokenizer.tokenType() == self.STRING_CONST: tok = self.tokenizer.current_token self.vm_writer.writePush('constant', len(tok)) self.vm_writer.writeCall('String.new', 1) for char in tok: self.vm_writer.writePush('constant', ord(char)) self.vm_writer.writeCall('String.appendChar', 2) self.tokenizer.advance() return #keywordConstant key_const_set = {self.TRUE, self.FALSE, self.NULL, self.THIS} if self.tokenizer.tokenType( ) == self.KEYWORD and self.tokenizer.keyWord() in key_const_set: keyword_constant = self.tokenizer.current_token if keyword_constant == self.TRUE: self.vm_writer.writePush('constant', 1) self.vm_writer.writeArithmetic('-', False) if keyword_constant in {self.FALSE, self.NULL}: self.vm_writer.writePush('constant', 0) if keyword_constant == self.THIS: self.vm_writer.writePush('pointer', 0) self.tokenizer.advance() return #varName | varName[expression] | subroutineName() | className.subroutine() | varName.subrountine() if self.tokenizer.tokenType() == self.IDENTIFIER: main_identifier = self.tokenizer.current_token self.tokenizer.advance() #varName[expression] if self.tokenizer.tokenType( ) == self.SYMBOL and self.tokenizer.symbol() == '[': self.tokenizer.advance() #push the array variable onto the stack segment = self.symboltable.kindOf(main_identifier) if segment == 'field': segment = 'this' self.vm_writer.writePush( segment, self.symboltable.indexOf(main_identifier)) self.compileExpression() self.eat_token(self.SYMBOL, {']'}) #close the array self.vm_writer.writeArithmetic('+', True) self.vm_writer.writePop('pointer', 1) self.vm_writer.writePush('that', 0) return #subrountineName() if self.tokenizer.tokenType( ) == self.SYMBOL and self.tokenizer.symbol() == '(': self.tokenizer.advance() n_parameters = self.compileExpressionList() self.eat_token(self.SYMBOL, {')'}) #should we do this right here without a semicolan?? self.vm_writer.writeCall(main_identifier, n_parameters) return #className|varName.subroutine() if self.tokenizer.tokenType( ) == self.SYMBOL and self.tokenizer.symbol() == '.': self.tokenizer.advance() is_var_name = self.symboltable.contains(main_identifier) dotId = self.eat_token_type(self.IDENTIFIER) #if it is a variable. if is_var_name: segment = self.symboltable.kindOf(main_identifier) if segment == 'field': segment = 'this' self.vm_writer.writePush( segment, self.symboltable.indexOf(main_identifier)) main_identifier = self.symboltable.typeOf( main_identifier) + '.' + dotId else: main_identifier = main_identifier + '.' + dotId self.eat_token(self.SYMBOL, {'('}) n_parameters = self.compileExpressionList() #what about methods? they get an extra argument no? self.eat_token(self.SYMBOL, {')'}) if is_var_name: n_parameters = n_parameters + 1 self.vm_writer.writeCall(main_identifier, n_parameters) return #push the variable to the stack (if it exists in the current scope) segment = self.symboltable.kindOf(main_identifier) if segment == 'field': segment = 'this' self.vm_writer.writePush(segment, self.symboltable.indexOf(main_identifier)) return #(expression) if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol( ) == '(': self.tokenizer.advance() self.compileExpression() self.eat_token(self.SYMBOL, {')'}) return #unary opp if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol( ) in {'-', '~'}: operator = self.tokenizer.current_token self.tokenizer.advance() self.compileTerm() self.vm_writer.writeArithmetic(operator, False) return #if we got here, raise exception raise Exception('end of the term and nothing was found') def compileExpressionList(self): num_expressions = 0 if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol( ) == ')': return num_expressions self.compileExpression() num_expressions = num_expressions + 1 while self.tokenizer.tokenType( ) == self.SYMBOL and self.tokenizer.symbol() == ',': self.tokenizer.advance() self.compileExpression() num_expressions = num_expressions + 1 return num_expressions
class TypeChecker(NodeVisitor): errorOccured = False symbolTable = SymbolTable() WRONG_MATRIX = "wrong_matrix" WRONG_REF = "wrong_ref" returnedType = {'int': {}, 'float': {}, 'string': {}} for i in returnedType.keys(): returnedType[i] = {} for j in returnedType.keys(): returnedType[i][j] = {} for k in ['+', '-', '/', '*', '%']: returnedType[i][j][k] = 'err' returnedType['int']['float']['+'] = 'float' returnedType['int']['int']['+'] = 'int' returnedType['float']['float']['+'] = 'float' returnedType['float']['int']['+'] = 'float' returnedType['string']['string']['+'] = 'string' returnedType['int']['float']['-'] = 'float' returnedType['int']['int']['-'] = 'int' returnedType['float']['float']['-'] = 'float' returnedType['float']['int']['-'] = 'float' returnedType['int']['float']['*'] = 'float' returnedType['int']['int']['*'] = 'int' returnedType['float']['float']['*'] = 'float' returnedType['float']['int']['*'] = 'float' returnedType['string']['int']['*'] = 'string' returnedType['int']['float']['/'] = 'float' returnedType['int']['int']['/'] = 'int' returnedType['float']['float']['/'] = 'float' returnedType['float']['int']['/'] = 'float' returnedType['int']['int']['%'] = 'int' returnedTypeRelative = {'int': {}, 'float': {}, 'string': {}} for i in returnedTypeRelative.keys(): returnedTypeRelative[i] = {} for j in returnedTypeRelative.keys(): returnedTypeRelative[i][j] = 'err' returnedTypeRelative['int']['float'] = 'int' returnedTypeRelative['int']['int'] = 'int' returnedTypeRelative['float']['float'] = 'int' returnedTypeRelative['float']['int'] = 'int' returnedTypeRelative['string']['string'] = 'int' def visit_Program(self, node): self.visit(node.insts) def visit_Instructions(self, node): for instruction in node.instrs: self.visit(instruction) def visit_Print(self, node): array_to_print = self.visit(node.expr) for elem in array_to_print: if elem not in ('int', 'float', 'string'): pass # print("Error: CANNOT PRINT", node.expr, "which type is:", elem) # self.errorOccured = True def visit_Assignment(self, node): id = self.visit(node.id) oper = self.visit(node.oper)[0] expr = self.visit(node.expr) if oper == '=': if isinstance(node.id, AST.Ref): if id != self.WRONG_REF and id != expr: print( 'Error: MISMATCH in types in matrix reference assignment: left:', id, 'right:', expr) self.errorOccured = True else: if expr == self.WRONG_MATRIX: print(" - ", node.id) else: self.symbolTable.put(str(node.id), expr) else: if id == "none": print("Error:", node.id, oper, "\b=", node.expr, "\tused without previous assignment to variable", node.id) self.errorOccured = True else: ret_type = self.returnedType[id][expr][oper] if self.returnedType[id][expr][oper] == 'err': print("Error: MISMATCH TYPE in", node.id, oper, "\b=", expr, "\b. Previous type was", id) self.errorOccured = True def visit_Ref(self, node): ref_type = self.visit(node.id) id_sizes = re.sub("[x]", " ", ref_type).split() id_sizes.pop(len(id_sizes) - 1) for i in range(0, len(id_sizes)): id_sizes[i] = int(id_sizes[i]) ref_vector = node.vector.exprs ref_sizes = [] for i in ref_vector: ref_sizes.append(i.value) if len(id_sizes) < len(ref_sizes): print('Error:\tmatrix wrong reference. Real size:', id_sizes, 'while referenced to:', ref_sizes) self.errorOccured = True return self.WRONG_REF else: for i in range(0, len(ref_sizes)): if ref_sizes[i] >= id_sizes[i]: print('Error:\tmatrix wrong reference. Real size:', id_sizes, 'while referenced to:', ref_sizes) self.errorOccured = True return self.WRONG_REF dimensions_to_skip = len(ref_sizes) id_sizes = re.sub("[x]", " ", ref_type).split() ret_type = '' for i in range(len(id_sizes), dimensions_to_skip, -1): ret_type = 'x' + id_sizes[i - 1] + ret_type return ret_type[1:len(ret_type)] def visit_Assign_operator(self, node): return node.oper def visit_Vector(self, node): elem_type = self.visit(node.expressions) first_type = elem_type[0] for elem in elem_type: if elem != first_type: print("Error: Incompatible types in matrix:", elem_type, end='') self.errorOccured = True return self.WRONG_MATRIX matrix_len = len(node.expressions.exprs) return str(matrix_len) + 'x' + first_type def visit_Expressions(self, node): tmp = [] for expr in node.exprs: tmp.append(self.visit(expr)) return tmp def visit_Choice(self, node): self.symbolTable.pushScope() self.visit(node.cond) self.visit(node.inst1) if node.inst2 is not None: self.visit(node.inst2) self.symbolTable.popScope() def visit_While(self, node): self.symbolTable.pushScope() self.visit(node.cond) self.symbolTable.pushLoop() self.visit(node.stmt) self.symbolTable.popLoop() self.symbolTable.popScope() def visit_For(self, node): self.symbolTable.pushScope() self.symbolTable.put(str(node.id), 'int') self.visit(node.range) self.symbolTable.pushLoop() self.visit(node.inst) self.symbolTable.popLoop() self.symbolTable.popScope() def visit_Range(self, node): from_type = self.symbolTable.get(node.range_from) if from_type != 'none' and from_type != 'int': print("Range from should evaluate to int") self.errorOccured = True to_type = self.symbolTable.get(node.range_to) # print('to_type', to_type) if to_type != 'none' and to_type != 'int': print("Range to should evaluate to int") self.errorOccured = True def visit_Return(self, node): self.visit(node.ret) def visit_Continue(self, node): if self.symbolTable.loop <= 0: print("Continue used outside loop") self.errorOccured = True def visit_Break(self, node): if self.symbolTable.loop <= 0: print("Break used outside loop") self.errorOccured = True def visit_ComInstructions(self, node): self.symbolTable.pushScope() self.visit(node.instrs) self.symbolTable.popScope() def visit_Const(self, node): if type(node.value) == str: return 'string' if type(node.value) == int: return 'int' if type(node.value) == float: return 'float' def visit_BinExpr(self, node): type1 = self.visit(node.left) type2 = self.visit(node.right) op = node.op if type1 not in ('int', 'float', 'string') or type2 not in ('int', 'float', 'string'): if type1 != type2: print("Error: TYPE MISMATCH IN BIN EXPR", type1, op, type2, " here: ", node.left, op, node.right) self.errorOccured = True return 'wrong_bin_expr' elif self.returnedType[type1][type2][op] == 'err': print("Error: TYPE MISMATCH IN BIN EXPR", type1, op, type2, " here: ", node.left, op, node.right) self.errorOccured = True return self.returnedType[type1][type2][op] def visit_Condition(self, node): type1 = self.visit(node.left) type2 = self.visit(node.right) if self.returnedTypeRelative[type1][type2] == 'err': print("Error: TYPE MISMATCH IN CONDITION:", type1, node.op, type2, 'here:', node.left, node.op, node.right) self.errorOccured = True if self.returnedTypeRelative[type1][type2] != 'int': print("Error: CONDITION MUST BE INT") self.errorOccured = True def visit_Variable(self, node): return self.symbolTable.get(str(node.ID)) def visit_Matrix_operation(self, node): type1 = self.symbolTable.get(str(node.matrix1)) type2 = self.symbolTable.get(str(node.matrix2)) if type1 != type2: print('Error: matrix operation on incompatible types:', type1, type2, 'here:', node.matrix1, node.dot_oper, node.matrix2) self.errorOccured = True def visit_Dot_operation(self, node): return node.dot_oper def visit_Matrix(self, node): self.visit(node.matrix) def visit_Matrix_transposed(self, node): self.visit(node.id) def visit_Minus_transposed(self, node): self.visit(node.id) def visit_Matrix_function(self, node): size = node.arg matrix_type = "" for dimension in node.arg.exprs: matrix_type += str(dimension) + 'x' return matrix_type + 'int'
class SemanticAnalyser(HelloVisitor): """Global variables""" current_symbol_table = SymbolTable(parent=None) type_table = TypeTable type_table.table[1] = PrimitiveType() type_table.table[2] = PrimitiveType() type_table.table[3] = PrimitiveType() @staticmethod def unicode_to_str(unicode_str): """ Function to convert unicode string to ascii string :param unicode_str: sring in format u'__any_string__' :return: ascii string """ return unicodedata.normalize('NFKD', unicode_str).encode('ascii', 'ignore') def visitProgram(self, ctx): return self.visitChildren(ctx) # Visit a parse tree produced by HelloParser#simpleDeclaration. def visitSimpleDeclaration(self, ctx): return self.visitChildren(ctx) # Visit a parse tree produced by HelloParser#variableDeclaration. def visitVariableDeclaration(self, ctx): # array with all children of a current context children = ctx.children # get the context of children identifier = self.unicode_to_str(ctx.Identifier().getText()) lang_type = self.visitChildren(ctx) expression = ctx.expression() # if type is specified deduce type if len(children) > 4: lang_type = self.visitLang_type(children[3]) final_type = lang_type # check if the variable was already defined in the current scope if self.current_symbol_table.is_defined_in_current_scope(identifier): raise Exception( 'Variable {} is already defined'.format(identifier)) # deduce type from expression if no explicit type was specified if lang_type is None: # 'var' Identifier 'is' expression final_type = self.visitExpression(expression) # check if explicit type definition corresponds to the expression type elif lang_type is not None and expression is not None: # 'var' Identifier ':' lang_type 'is' expression expression_type = self.visitExpression(expression) if lang_type != expression_type: raise Exception( 'Incompatible types in variable declaration {} '.format( identifier)) # add variable to the symbol table self.current_symbol_table.add_variable(identifier, final_type) # Visit a parse tree produced by HelloParser#typeDeclaration. def visitTypeDeclaration(self, ctx): # get the context of children identifier = self.unicode_to_str(ctx.Identifier().getText()) current_type = self.visitLang_type(ctx) # add alias for the type to the Type table AliasType.table[identifier] = current_type # Visit a parse tree produced by HelloParser#lang_type. def visitLang_type(self, ctx): # array with all children of a current context children = ctx.children # if type declaration is an alias to an existing alias if len(children) > 3 and hasattr( ctx.children[3], 'Identifier') and ctx.children[3].Identifier() is not None: identifier = self.unicode_to_str( ctx.children[3].Identifier().getText()) return AliasType.table[identifier] # integrating the Universe if len(children) == 1 and self.unicode_to_str( children[0].getText()) in AliasType.table.keys(): return AliasType.table[children[0].getText()] return self.visitChildren(ctx) # Visit a parse tree produced by HelloParser#primitiveType. def visitPrimitiveType(self, ctx): # getting a code of a type from the Type table p_text = self.unicode_to_str(ctx.children[0].getText()) identifier = PrimitiveType.types[p_text] return identifier # Visit a parse tree produced by HelloParser#userType. def visitUserType(self, ctx): return self.visitChildren(ctx) # Visit a parse tree produced by HelloParser#recordType. def visitRecordType(self, ctx): # creating a new scope for the record in order to define new types self.current_symbol_table = self.current_symbol_table.create_child_scope( 'current_record') # array with all children of a current context children = ctx.children # creating a dictionary with variables defined in the record record_variables = {} for c in children: if type(c) == HelloParser.VariableDeclarationContext: var_name = self.unicode_to_str(c.children[1].getText()) var_type = self.visitLang_type(c) record_variables[var_name] = var_type # define this record as a new type new_type = RecordType(record_variables) self.current_symbol_table = self.current_symbol_table.parent_scope # remove the scope because records don't have a scope, # we just needed it to add tew variables and define a new type self.current_symbol_table.remove_child_scope('current_record') return new_type.get_id() # Visit a parse tree produced by HelloParser#arrayType. def visitArrayType(self, ctx): # get the type of array elements nested_type = self.visitChildren(ctx) # create a new type of array new_type = ArrayType(nested_type) # check type in case the size of the array is defined with expression expression = ctx.children[2] if self.visitExpression(expression) != PrimitiveType.integer: raise Exception('Array size can only be integer') return new_type.get_id() # Visit a parse tree produced by HelloParser#statement. def visitStatement(self, ctx): return self.visitChildren(ctx) # Visit a parse tree produced by HelloParser#assignment. def visitAssignment(self, ctx): # getting assignment contexts and their types lhs = ctx.modifiablePrimary() rhs = ctx.expression() lhs_type = self.visitModifiablePrimary(lhs) rhs_type = self.visitExpression(rhs) # checking assignment types compatibility if TypeTable.get_type_name(lhs_type) == 'ArrayType': # if trying to assign incompatible type to an array element if TypeTable.table[lhs_type].nested_type_id != rhs_type: raise Exception( 'Cannot assign {} to array with elements of type {}'. format(rhs_type, lhs_type)) else: return self.visitChildren(ctx) # check for assignment of real to boolean elif not TypeUtils.are_compatible_for_assignment(lhs_type, rhs_type): raise Exception( 'Types {} and {} are not compatible for assignment'.format( TypeTable.get_type_name(lhs_type), TypeTable.get_type_name(rhs_type))) # Visit a parse tree produced by HelloParser#routineCall. def visitRoutineCall(self, ctx): # getting context children, routine name and return type children = ctx.children routine_name = self.unicode_to_str(ctx.Identifier().getText()) return_type = self.current_symbol_table.get_routine_info( routine_name).return_type routine_parameters = self.current_symbol_table.get_routine_info( routine_name).parameters # check if routine was defined if not self.current_symbol_table.routine_defined_in_scope( routine_name): raise Exception('Routine {} is not defined'.format(routine_name)) # constructing routine call argument list arguments = [] for c in children: if type(c) == HelloParser.ExpressionContext: arguments.append(c) # check number of arguments compatibility if len(routine_parameters) != len(arguments): raise Exception( "Wrong number of arguments in routine call {}".format( routine_name)) # check argument types and parameter types compatibility for p, a in zip(routine_parameters, arguments): argument_type = self.visitExpression(a) if not TypeUtils.are_compatible_for_assignment(p, argument_type): raise Exception( 'Parameter of type {} and argument of type {} are not compatible in {} routine call' .format(TypeTable.get_type_name(p), TypeTable.get_type_name(argument_type), routine_name)) return return_type # Visit a parse tree produced by HelloParser#whileLoop. def visitWhileLoop(self, ctx): # creating new scope for while loop self.current_symbol_table = self.current_symbol_table.create_child_scope( self.current_symbol_table.get_new_inner_scope_name()) # visiting while loop context children self.visitChildren(ctx) # returning to higher scope self.current_symbol_table = self.current_symbol_table.parent_scope # Visit a parse tree produced by HelloParser#forLoop. def visitForLoop(self, ctx): # creating new scope for 'for' loop self.current_symbol_table = self.current_symbol_table.create_child_scope( self.current_symbol_table.get_new_inner_scope_name()) # adding loop iteration variable to loops scope identifier = self.unicode_to_str(ctx.Identifier().getText()) self.current_symbol_table.add_variable(identifier, PrimitiveType.integer) # visiting for loop context children self.visitChildren(ctx) # returning to higher scope self.current_symbol_table = self.current_symbol_table.parent_scope # Visit a parse tree produced by HelloParser#lang_range. def visitLang_range(self, ctx): # getting context children, start and end of the range and theit types children = ctx.children start_range = children[0] end_range = children[2] start_type = self.visitExpression(start_range) end_type = self.visitExpression(end_range) # check range boundaries to be integers if start_type != PrimitiveType.integer or end_type != PrimitiveType.integer: raise Exception('Range boundaries are not integer numbers') # Visit a parse tree produced by HelloParser#ifStatement. def visitIfStatement(self, ctx): # getting context children children = ctx.children expression = children[1] # creating new scope for if statement self.current_symbol_table = self.current_symbol_table.create_child_scope( self.current_symbol_table.get_new_inner_scope_name()) # check if condition to be boolean if self.visitExpression(expression) != PrimitiveType.boolean: raise Exception("Condition of if statement is not boolean") # visit if body self.visitBody(children[3]) # returning to higher scope self.current_symbol_table = self.current_symbol_table.parent_scope # check else case if len(children) > 5: # creating new scope for else statement self.current_symbol_table = self.current_symbol_table.create_child_scope( self.current_symbol_table.get_new_inner_scope_name()) # visit else body self.visitBody(children[5]) # returning to higher scope self.current_symbol_table = self.current_symbol_table.parent_scope # Visit a parse tree produced by HelloParser#routineDeclaration. def visitRoutineDeclaration(self, ctx): # getting context children identifier = self.unicode_to_str(ctx.Identifier().getText()) routine_parameters = ctx.parameters() routine_return_type = ctx.lang_type() return_expression = ctx.expression() body = ctx.body() # check if routine with this name already exists if self.current_symbol_table.routine_defined_in_scope(identifier): raise Exception('Routine {} is already defined'.format(identifier)) # create a new scope for routine self.current_symbol_table = self.current_symbol_table.create_child_scope( identifier) # check routine parameters declaration and construct a list woth those parameters if routine_parameters is not None: parameters_children = ctx.parameters().children declarations = [] for i in range(len(parameters_children)): if i % 2 == 1: declarations.append(parameters_children[i]) parameters_list = [] for d in declarations: _, t = self.visitParameterDeclaration(d) parameters_list.append(t) else: parameters_list = None # check return type ans return statement consistency if routine_return_type is not None: return_type = self.visitLang_type(routine_return_type) if return_expression is None: raise Exception("Routine must have a return statement") else: return_type = None if return_expression is not None: raise Exception("Routine has no return type") # add routine to the scope self.current_symbol_table.parent_scope.add_routine( identifier, parameters_list, return_type) # visit body if body is not None: self.visitBody(body) # check expression in return statement to be of routines return type if return_expression is not None: expr_type = self.visitExpression(return_expression) if return_type != expr_type: raise Exception("Return type must be {}".format( TypeTable.get_type_name(return_type))) # returning to higher scope self.current_symbol_table = self.current_symbol_table.parent_scope # Visit a parse tree produced by HelloParser#parameters. def visitParameters(self, ctx): return self.visitChildren(ctx) # Visit a parse tree produced by HelloParser#parameterDeclaration. def visitParameterDeclaration(self, ctx): # getting context children identifier = self.unicode_to_str(ctx.children[0].getText()) lang_type = self.visitLang_type(ctx) # chech is parameter with this name is already defined if self.current_symbol_table.is_defined_in_current_scope(identifier): raise Exception( 'Parameter with name {} is already defined'.format(identifier)) # add variable to current scope self.current_symbol_table.add_variable(identifier, lang_type) return identifier, lang_type # Visit a parse tree produced by HelloParser#body. def visitBody(self, ctx): return self.visitChildren(ctx) # Visit a parse tree produced by HelloParser#expression. def visitExpression(self, ctx): # getting context children children = ctx.children # if one child get and return type if len(children) <= 1: expression_type = self.visitRelation(children[0]) if expression_type is None: raise Exception( "Attempt to call a routine, which doesn't return anything") return expression_type # if both relations are present get their types left_type = self.visitRelation(children[0]) right_type = self.visitRelation(children[2]) # check if both are boolean if left_type != PrimitiveType.boolean or right_type != PrimitiveType.boolean: raise Exception( 'Incompatible types {} and {} in expression, can be applied to boolean only' .format(TypeTable.get_type_name(left_type), TypeTable.get_type_name(right_type))) # return expression type expression_type = PrimitiveType.boolean return expression_type # Visit a parse tree produced by HelloParser#relation. def visitRelation(self, ctx): # getting context children children = ctx.children # if one child get and return type if len(children) <= 1: return self.visitSimple(children[0]) # if both relations are present check their type compatibility left_type = self.visitSimple(children[0]) right_type = self.visitSimple(children[2]) TypeUtils.deduce_type_comparable(left_type, right_type) return PrimitiveType.boolean # Visit a parse tree produced by HelloParser#simple. def visitSimple(self, ctx): # getting context children children = ctx.children simple_type = self.visitFactor(children[0]) # if one child get and return type if len(children) <= 1: return simple_type # get operator operator = children[1] # if both relations are present check their type compatibility according to the operator if operator is not None: left = children[0] right = children[2] operator_text = self.unicode_to_str(operator.getText()) if operator_text == '*': simple_type = TypeUtils.deduce_type(self.visitFactor(left), self.visitFactor(right)) elif operator_text == '/': simple_type = TypeUtils.deduce_type_division( self.visitFactor(left), self.visitFactor(right)) elif operator_text == '%': simple_type = TypeUtils.deduce_type_module( self.visitFactor(left), self.visitFactor(right)) return simple_type # Visit a parse tree produced by HelloParser#factor. def visitFactor(self, ctx): # getting context children children = ctx.children factor_type = self.visitSummand(children[0]) # if one child get and return type if len(children) <= 1: return factor_type # if both relations are present check their type compatibility if len(children) > 1: left = children[0] right = children[2] factor_type = TypeUtils.deduce_type(self.visitSummand(left), self.visitSummand(right)) return factor_type # Visit a parse tree produced by HelloParser#summand. def visitSummand(self, ctx): # getting context children children = ctx.children summand_type = self.visitChildren(ctx) # if summand is an expression if len(children) == 3: return self.visitExpression(children[1]) return summand_type # Visit a parse tree produced by HelloParser#primary. def visitPrimary(self, ctx): # getting context children children = ctx.children child_type = self.visitChildren(ctx) int_lit = ctx.IntegerLiteral() real_lit = ctx.RealLiteral() routine_call = ctx.routineCall() # deduce primary type if routine_call is not None: # if primary is routine call type_id = self.visitRoutineCall(routine_call) elif int_lit is not None: # if primary is integer type_id = PrimitiveType.integer elif real_lit is not None: # if primary is real type_id = PrimitiveType.real elif self.unicode_to_str( children[0].getText()) == 'true' or self.unicode_to_str( children[0].getText()) == 'false': # if primary is boolean type_id = PrimitiveType.boolean else: # if primary is modifiable primary type_id = child_type return type_id # Visit a parse tree produced by HelloParser#modifiablePrimary. def visitModifiablePrimary(self, ctx): # self.visitChildren(ctx) # getting context children children = ctx.children record_calls = [] # if modifiable primary is a variable name if len(children) == 1: identifier = self.unicode_to_str(children[0].getText()) # check if variable was declared if not self.current_symbol_table.is_defined_in_scope(identifier): raise Exception( 'Variable {} is not defined'.format(identifier)) # return type of the variable from the symbol table return self.current_symbol_table.get_variable_info( identifier).variable_type # if modifiable primary is a array identifier elif type(children[2]) is HelloParser.ExpressionContext: array_identifier = self.unicode_to_str(children[0].getText()) # check if array was declared if not self.current_symbol_table.is_defined_in_scope( array_identifier): raise Exception('Array with name {} is not defined'.format( array_identifier)) # return type of the array from the symbol table return TypeTable.get_type( self.current_symbol_table.get_variable_info( array_identifier).variable_type).nested_type_id # if modifiable primary is a record field access else: # append identifiers of records and their fields to a list for i in range(len(children)): if i % 2 == 0: identifier = self.unicode_to_str(children[i].getText()) # check that the first identifier is a declared variable with type record if i == 0 and not self.current_symbol_table.is_defined_in_scope( identifier): raise Exception( 'Record with name {} is not defined'.format( identifier)) record_calls.append(identifier) # check validity of field calls type_id = self.current_symbol_table.get_variable_info( record_calls[0]).variable_type current_type = self.type_table.table[type_id] for i in range(len(record_calls) - 1): if record_calls[ i + 1] not in current_type.inner_declarations.keys(): raise Exception("Record {} doesn't have a field {}".format( record_calls[i], record_calls[i + 1])) type_id = current_type.inner_declarations[record_calls[i + 1]] current_type = self.type_table.table[type_id] return type_id # Visit a parse tree produced by HelloParser#eos. def visitEos(self, ctx): return self.visitChildren(ctx)
def _initial(self): self.symbols = SymbolTable() self.next_addr = 16
def __init__(self): self.table = SymbolTable(None, "root") self.actType = ""
class F100Asm(): def __init__(self): self.st = SymbolTable() self.pc = 0 self.opcodes = [ o() for o in (OpcodeF0_Jump, OpcodeF0_Shift, OpcodeF0_Halt, OpcodeF0_Bit, OpcodeF1, OpcodeF2, OpcodeF3, OpcodeF4, OpcodeF5, OpcodeF6, OpcodeF7, OpcodeF8, OpcodeF9, OpcodeF10, OpcodeF11, OpcodeF12, OpcodeF13, OpcodeF15) ] def is_valid_opcode(self, opcode_str): for o in self.opcodes: if o.opcode_regexp.match(opcode_str): return o.__class__() return None def line_assemble(self, opcode, operands, symbol_table, suppress_errors=False): assembled_words = [] warnings = [] op = self.is_valid_opcode(opcode) if op == None: raise UserWarning("Unrecognized opcode %s" % opcode) (assembled_words, warnings) = op.assemble(opcode, operands, symbol_table, suppress_errors) return (assembled_words, warnings) def twopass_assemble(self, text, listingon=True): assembled_words = dict() for i in range(0, 2): assembled_words = self.assemble(text, i, listingon) return assembled_words def assemble(self, text, pass_number, listingon=True): ''' Build the text into lines of tokens and expressions''' error_count = 0 warning_count = 0 lineno = 1 label_list = dict() assembled_words = dict() if pass_number > 0: print(header_text) for textline in text: warnings = [] line_pc = self.pc line_words = [] line = textline.strip() ## Strip out comments from the end of the line comment_start = line.find(";") if comment_start > -1: line = line[:comment_start].strip() ## Strip out label from the start of the line if re.match("([a-zA-Z_][a-zA-Z0-9_]*:)", line): line_label = str.upper( re.match("([a-zA-Z_][a-zA-Z0-9_]*):", line).group(1)) if line_label in label_list: error_count += 1 print( "Error, label %s on line %d has already been defined" % (line_label, lineno)) else: label_list[line_label] = True self.st[line_label] = str(self.pc) line = line[len(line_label) + 1:].strip() ## Left now with a line which is either blank, directive or opcode if line != "": fields = [str.upper(f) for f in line.split()] t = fields[0] if directive_re.match(t): try: (self.pc, line_words) = self.process_directive(t, fields[1:]) except ValueError as v: if pass_number < 1: pass else: error_count += 1 raise v except SyntaxError as s: error_count += 1 print("Syntax Error on line %d: %s" % (lineno, s)) else: try: (line_words, warnings) = self.line_assemble( t, fields[1:], self.st, suppress_errors=True if pass_number < 1 else False) except ValueError as v: if pass_number > 0: # Ignore undefined symbols on first pass error_count += 1 print(v) except (TypeError, UserWarning, SyntaxError) as e: error_count += 1 if pass_number > 0: print("Error on line %d" % lineno) print(e) if len(warnings) > 0: for w in warnings: if pass_number > 0: print(w) warning_count += len(warnings) self.pc += len(line_words) if pass_number > 0 and listingon: ## Simple listing code fields = [] fields.append(" %5d:" % lineno) if len(line_words) > 0: if len(line_words) > 1: fields.append(" %04X: %04X %04X " % (line_pc, line_words[0] & 0xFFFF, line_words[1] & 0xFFFF)) else: fields.append(" %04X: %04X " % (line_pc, line_words[0] & 0xFFFF)) else: fields.append(' ' * 21) fields.append(textline.strip()) print(' '.join(fields)) for d in line_words[2:]: print(" %04X " % (d & 0xFFFF)), lineno += 1 if pass_number > 0: assembled_words[line_pc] = line_words if pass_number > 0: print(line_sep) print("# %d Error%s" % (error_count, '' if error_count == 1 else 's')) print("# %d Warning%s" % (warning_count, '' if warning_count == 1 else 's')) print(line_sep) print("# SymbolTable") for s in self.st.tostring().split('\n'): print("# %s" % s) if error_count > 0: raise UserWarning("Assembly finished with errors") return assembled_words def process_directive(self, directive, operands): new_pc = self.pc words = [] if directive == ".EQU": self.st[operands[0]] = ''.join(operands[1:]) if directive == ".ORG": new_pc = self.st.eval_expr(''.join(operands[0:])) elif directive == ".DATA" or directive == ".WORD": ## Need to resplit by commas rather than spaces data_words = (''.join(operands).split(',')) new_pc += len(data_words) for d in data_words: ## Cant eval all expressions on first pass so return dummy data try: words.append(self.st.eval_expr(d)) except ValueError as v: words.append(0xFF) return (new_pc, words)
class TypeChecker(NodeVisitor): def __init__(self): self.table = SymbolTable(None, "root") self.actType = "" def visit_NoneType(self, node): return 'None' def visit_Integer(self, node): return 'int' def visit_Float(self, node): return 'float' def visit_String(self, node): return 'string' def visit_Variable(self, node): definition = self.table.getGlobal(node.name) if definition is None: print "Undefined symbol {} in line {}".format(node.name, node.line) else: return definition.type def visit_BinExpr(self, node): left = self.visit(node.left) right = self.visit(node.right) op = node.op if ttype(op, left, right) is None: print "Bad expression {} in line {}".format(node.op, node.line) return ttype(op, left, right) def visit_AssignmentInstruction(self, node): definition = self.table.getGlobal(node.id) type = self.visit(node.expr) if definition is None: print "Used undefined symbol {} in line {}".format( node.id, node.line) elif type != definition.type and (definition.type != "float" and definition != "int"): print "Bad assignment of {} to {} in line {}.".format( type, definition.type, node.line) def visit_GroupedExpression(self, node): return self.visit(node.interior) def visit_FunctionExpression(self, node): if self.table.get(node.name): print "Function {} already defined. Line: {}".format( node.name, node.line) else: function = FunctionSymbol(node.name, node.retType, SymbolTable(self.table, node.name)) self.table.put(node.name, function) self.actFunc = function self.table = self.actFunc.table if node.args is not None: self.visit(node.args) self.visit(node.body) self.table = self.table.getParentScope() self.actFunc = None def visit_CompoundInstruction(self, node): innerScope = SymbolTable(self.table, "innerScope") self.table = innerScope if node.declarations is not None: self.visit(node.declarations) self.visit(node.instructions) self.table = self.table.getParentScope() def visit_ArgumentList(self, node): for arg in node.children: self.visit(arg) self.actFunc.extractParams() def visit_Argument(self, node): if self.table.get(node.name) is not None: print "Argument {} already defined. Line: {}".format( node.name, node.line) else: self.table.put(node.name, VariableSymbol(node.name, node.type)) def visit_InvocationExpression(self, node): funDef = self.table.getGlobal(node.name) if funDef is None or not isinstance(funDef, FunctionSymbol): print "Function {} not defined. Line: {}".format( node.name, node.line) else: if node.args is None and funDef.params != []: print "Invalid number of arguments in line {}. Expected {}".\ format(node.line, len(funDef.params)) else: types = [self.visit(x) for x in node.args.children] expectedTypes = funDef.params for actual, expected in zip(types, expectedTypes): if actual != expected and not (actual == "int" and expected == "float"): print "Mismatching argument types in line {}. Expected {}, got {}".\ format(node.line, expected, actual) return funDef.type def visit_ChoiceInstruction(self, node): self.visit(node.condition) self.visit(node.action) if node.alternateAction is not None: self.visit(node.alternateAction) def visit_WhileInstruction(self, node): self.visit(node.condition) self.visit(node.instruction) def visit_RepeatInstruction(self, node): self.visit(node.condition) self.visit(node.instructions) def visit_ReturnInstruction(self, node): if self.actFunc is None: print "Return placed outside of a function in line {}".format( node.line) else: type = self.visit(node.expression) if type != self.actFunc.type and (self.actFunc.type != "float" or type != "int"): print "Invalid return type of {} in line {}. Expected {}".format( type, node.line, self.actFunc.type) def visit_Init(self, node): initType = self.visit(node.expr) if initType == self.actType or (initType == "int" and self.actType == "float") or (initType == "float" and self.actType == "int"): if self.table.get(node.name) is not None: print "Invalid definition of {} in line: {}. Entity redefined".\ format(node.name, node.line) else: self.table.put(node.name, VariableSymbol(node.name, self.actType)) else: print "Bad assignment of {} to {} in line {}".format( initType, self.actType, node.line) def visit_Declaration(self, node): self.actType = node.type self.visit(node.inits) self.actType = "" def visit_PrintInstruction(self, node): self.visit(node.expr) def visit_LabeledInstruction(self, node): self.visit(node.instr) def visit_Program(self, node): print "Visiting program" self.visit(node.declarations) self.visit(node.fundefs) self.visit(node.instructions)
def visit_Instructions(self,node): # print "visiting Instructions" self.symbolTable = SymbolTable(self.symbolTable,'instructions') for element in node.list : element.accept(self) self.symbolTable = self.symbolTable.getParentScope()
from SymbolTable import SymbolTable if __name__ == "__main__": _symbolTable = SymbolTable() _symbolTable.add("N1") _symbolTable.add("N2") _symbolTable.add("AB") _symbolTable.add("BC") _symbolTable.add("N3") _symbolTable.add("A3") ''' result = _symbolTable.search("N1") if result is None: print("None") else: print(result) result = _symbolTable.search("N4") if result is None: print("None") else: print(result) result = _symbolTable.search("15") if result is None: print("None") else: print(result) '''
class CompilationEngine: all_operators = { "+": "add", "-": "sub", "/": "div", "*": "mul", "&": "and", "|": "or", ">": "gt", "<": "lt", "=": "eq" } def __init__(self, tokens, out_file): """ initializing a new compile engine object :param tokens: the list of tokens created by the tokenizer :param out_file: the output file. """ self.__tokens = tokens self.__file = out_file self.__i = 0 self.__class_symbol = SymbolTable() self.__subroutine_symbol = SymbolTable() self.__cur_token = () self.__class_name = "" self.__writer = VMWriter(out_file) self.__label_count = 0 self.compile_class() self.__writer.close() def eat(self): """ compiling a single token and move to the next one """ self.__cur_token = self.__tokens[self.__i] self.__i += 1 def get_token(self): return self.__cur_token[1] def peek(self): """ checking the current token without compiling :return: the token """ ret_val = self.__tokens[self.__i] return ret_val[1] def peek_type(self): """ checking the current token type without compiling :return: the token type """ ret_val = self.__tokens[self.__i] return ret_val[0] def peek_ll2(self): """ checking two tokens ahead without compiling :return: the token """ ret_val = self.__tokens[self.__i + 1] return ret_val[1] def compile_while_stat(self): # i points to while """ compiling while statement """ self.eat() self.eat() label_true = "L%s" % self.__label_count self.__label_count += 1 label_continue = "L%s" % self.__label_count self.__label_count += 1 self.__writer.write_label(label_true) self.compile_expression() self.__writer.write_arithmetic("not") self.__writer.write_if(label_continue) self.eat() self.eat() self.compile_statements() self.__writer.write_go_to(label_true) self.eat() self.__writer.write_label(label_continue) def compile_return_stat(self): # i points to return """ compiling return statement """ self.eat() if not self.peek() == ";": self.compile_expression() else: self.__writer.write_push("constant", 0) self.__writer.write_return() self.eat() def compile_do_stat(self): """ compiling do statement """ self.eat() self.compile_subroutine_call() self.__writer.write_pop("temp", 0) self.eat() def compile_if_stat(self): """ compiling if statement """ self.eat() self.eat() self.compile_expression() self.__writer.write_arithmetic("not") label_false = "L%s" % self.__label_count self.__label_count += 1 label_continue = "L%s" % self.__label_count self.__label_count += 1 self.__writer.write_if(label_false) self.eat() self.eat() self.compile_statements() self.__writer.write_go_to(label_continue) self.eat() self.__writer.write_label(label_false) if self.peek() == "else": self.eat() self.eat() self.compile_statements() self.eat() self.__writer.write_label(label_continue) def compile_class_var_dec(self): """ compiling class variable declaration """ self.eat() kind = self.get_token() if kind == "var": kind = SymbolTable.VAR self.var_dec_helper(kind, self.__class_symbol) def compile_var_dec(self): """ compiling variable declaration """ self.eat() self.var_dec_helper(SymbolTable.VAR, self.__subroutine_symbol) def var_dec_helper(self, kind, symbol_table): self.eat() type = self.get_token() self.eat() name = self.get_token() symbol_table.add(name, type, kind) cur_stat = self.peek() while cur_stat != ";": self.eat() self.eat() name = self.get_token() symbol_table.add(name, type, kind) cur_stat = self.peek() self.eat() def compile_subroutine_body(self, func_name, func_type): """ compiling subroutine body """ self.eat() cur_stat = self.peek() while cur_stat == "var": self.compile_var_dec() cur_stat = self.peek() self.__writer.write_function( func_name, self.__subroutine_symbol.var_count(SymbolTable.VAR)) self.__subroutine_symbol.add("this", self.__class_name, "pointer") if func_type == "method": self.__writer.write_push(SymbolTable.ARG, 0) self.__writer.write_pop("pointer", 0) elif func_type == "constructor": self.__writer.write_push( "constant", self.__class_symbol.var_count(SymbolTable.FIELD)) self.__writer.write_call("Memory.alloc", 1) self.__writer.write_pop("pointer", 0) self.compile_statements() self.eat() def compile_parameter_list(self): """ compiling parameters list """ cur_stat = self.peek() if cur_stat != ")": self.eat() type = self.get_token() self.eat() name = self.get_token() self.__subroutine_symbol.add(name, type, SymbolTable.ARG) cur_stat = self.peek() while cur_stat == ",": self.eat() self.eat() type = self.get_token() self.eat() name = self.get_token() self.__subroutine_symbol.add(name, type, SymbolTable.ARG) cur_stat = self.peek() def compile_class(self): """ compiling class """ self.eat() self.eat() self.__class_name = self.get_token() self.eat() cur_stat = self.peek() while cur_stat == "static" or cur_stat == "field": self.compile_class_var_dec() cur_stat = self.peek() while cur_stat != "}": self.compile_subroutine_dec() cur_stat = self.peek() self.eat() def compile_expression(self): """ compiling expression """ self.compile_term() cur_stat = self.peek() while cur_stat in CompilationEngine.all_operators.keys(): self.eat() self.compile_term() self.compile_operation(cur_stat) cur_stat = self.peek() def compile_operation(self, op): """ compiling operation :param op: current op """ if op == "*": self.__writer.write_call("Math.multiply", 2) elif op == "/": self.__writer.write_call("Math.divide", 2) else: self.__writer.write_arithmetic(CompilationEngine.all_operators[op]) def compile_statements(self): """ compiling statements """ while self.compile_statement(): continue def compile_subroutine_call(self): """ compiling subroutine call """ self.eat() name = self.get_token() cur_stat = self.peek() if cur_stat == "(": self.eat() self.__writer.write_push("pointer", 0) args = self.compile_expression_list() self.eat() self.__writer.write_call(self.__class_name + "." + name, args + 1) else: self.eat() val = self.find(name) self.eat() var_name = self.get_token() self.eat() if not val: args = 0 else: self.__writer.push_val(val) name = val[0] args = 1 args += self.compile_expression_list() self.__writer.write_call(name + "." + var_name, args) self.eat() def compile_expression_list(self): """ compiling expression list """ args = 0 cur_stat = self.peek() if cur_stat != ")": self.compile_expression() args += 1 cur_stat = self.peek() while cur_stat == ",": self.eat() args += 1 self.compile_expression() cur_stat = self.peek() return args def compile_statement(self): """ compiling statement """ cur_stat = self.peek() if cur_stat == "if": self.compile_if_stat() elif cur_stat == "while": self.compile_while_stat() elif cur_stat == "do": self.compile_do_stat() elif cur_stat == "return": self.compile_return_stat() elif cur_stat == "let": self.compile_let_stat() else: return 0 # when there is no more statements to compile return 1 def compile_let_stat(self): """ compiling let statement """ self.eat() self.eat() name = self.get_token() data = self.find(name) kind = data[1] ind = data[2] if kind == "field": kind = "this" cur_stat = self.peek() if cur_stat == "[": self.compile_array(kind, ind) else: self.eat() self.compile_expression() self.__writer.write_pop(kind, ind) self.eat() # eat ; def compile_subroutine_dec(self): """ compiling subroutine declaration """ self.eat() func_type = self.get_token() self.eat() self.eat() func_name = self.__class_name + "." + self.get_token() self.eat() if func_type == "method": self.__subroutine_symbol.add("this", self.__class_name, SymbolTable.ARG) self.compile_parameter_list() self.eat() self.compile_subroutine_body(func_name, func_type) self.__subroutine_symbol = SymbolTable() def compile_term(self): """ compiling term """ cur_stat = self.peek_type() if cur_stat == JackTokenizer.INT_CONST: self.__writer.write_push("constant", self.peek()) self.eat() return if cur_stat == JackTokenizer.KEYWORD: if self.peek() == "null" or self.peek() == "false": self.__writer.write_push("constant", 0) elif self.peek() == "true": self.__writer.write_push("constant", 0) self.__writer.write_arithmetic("not") elif self.peek() == "this": self.__writer.write_push("pointer", 0) self.eat() return if cur_stat == JackTokenizer.STR_CONST: string1 = self.peek().replace('\t', "\\t") string2 = string1.replace('\n', "\\n") string3 = string2.replace('\r', "\\r") string = string3.replace('\b', "\\b") self.__writer.write_push("constant", len(string)) self.__writer.write_call("String.new", 1) for ch in string: self.__writer.write_push("constant", ord(ch)) self.__writer.write_call("String.appendChar", 2) self.eat() return cur_stat = self.peek() if cur_stat == "(": self.eat() self.compile_expression() self.eat() return if cur_stat == "-": self.eat() self.compile_term() self.__writer.write_arithmetic("neg") return if cur_stat == "~": self.eat() self.compile_term() self.__writer.write_arithmetic("not") return cur_stat = self.peek_ll2() if cur_stat == "[": self.eat() name = self.get_token() self.__writer.push_val(self.find(name)) self.eat() self.compile_expression() self.__writer.write_arithmetic("add") self.__writer.write_pop("pointer", 1) self.__writer.write_push("that", 0) self.eat() return if cur_stat == "." or cur_stat == "(": self.compile_subroutine_call() return self.eat() # varName name = self.get_token() self.__writer.push_val(self.find(name)) return def find(self, name): """ finding a variable name in symbol tables """ val = self.__subroutine_symbol.get_data(name) if not val: val = self.__class_symbol.get_data(name) elif not val: return False return val def compile_array(self, kind, index): """ compiling array assignment :param kind: var kind :param index: var index """ self.eat() self.compile_expression() self.eat() self.__writer.write_push(kind, index) self.__writer.write_arithmetic("add") self.eat() self.compile_expression() self.__writer.write_pop("temp", 0) self.__writer.write_pop("pointer", 1) self.__writer.write_push("temp", 0) self.__writer.write_pop("that", 0)
index = self.symbolTable.search(token) if index is None: self.symbolTable.add(token) index = self.symbolTable.search(token) self.pif.genPif(token, index) else: raise Exception( "There is a Lexical Error detected on line " + str(count) + " for token " + token) count += 1 ''' Tests ''' symTable = SymbolTable() pif = ProgramInternalForm() scanner = Scanner(symTable, pif) scanner.scan() print("symTable: ") print(symTable) print("pif: ") print(pif) symTable = SymbolTable() pif = ProgramInternalForm() scanner = Scanner(symTable, pif, "D:\\Facultate\\Sem 1\\FLCD\\STlab2\\p1er.txt") scanner.scan() print("symTable: ") print(symTable)
class TypeChecker(object): def __init__(self): self.errorsOcurred=False self.classTables={} operators = ['+','-','*','/','%','|','&','^','&&','||','<<','>>','==','!=','<','>','<=','>=','f'] self.types = ['int','float','string'] self.ttype = dict((key,dict((key,{}) for key in self.types)) for key in operators) self.ttype['+']['int']['float'] = 'float' self.ttype['+']['float']['int'] = 'float' self.ttype['+']['float']['float'] = 'float' self.ttype['+']['int']['int'] = 'int' self.ttype['+']['string']['string'] = 'string' self.ttype['-']['int']['float'] = 'float' self.ttype['-']['float']['int'] = 'float' self.ttype['-']['float']['float'] = 'float' self.ttype['-']['int']['int'] = 'int' self.ttype['*']['int']['float'] = 'float' self.ttype['*']['float']['int'] = 'float' self.ttype['*']['float']['float'] = 'float' self.ttype['*']['int']['int'] = 'int' self.ttype['/']['int']['float'] = 'float' self.ttype['/']['float']['int'] = 'float' self.ttype['/']['float']['float'] = 'float' self.ttype['/']['int']['int'] = 'int' self.ttype['%']['int']['int'] = 'int' self.ttype['|']['int']['int'] = 'int' self.ttype['&']['int']['int'] = 'int' self.ttype['^']['int']['int'] = 'int' self.ttype['&&']['int']['int'] = 'int' self.ttype['||']['int']['int'] = 'int' self.ttype['<<']['int']['int'] = 'int' self.ttype['>>']['int']['int'] = 'int' self.ttype['==']['int']['int'] = 'int' self.ttype['==']['int']['float'] = 'int' self.ttype['==']['float']['int'] = 'int' self.ttype['==']['float']['float'] = 'int' self.ttype['==']['string']['string'] = 'int' self.ttype['!=']['int']['int'] = 'int' self.ttype['!=']['int']['float'] = 'int' self.ttype['!=']['float']['int'] = 'int' self.ttype['!=']['float']['float'] = 'int' self.ttype['!=']['string']['string'] = 'int' self.ttype['<']['int']['int'] = 'int' self.ttype['<']['int']['float'] = 'int' self.ttype['<']['float']['int'] = 'int' self.ttype['<']['float']['float'] = 'int' self.ttype['<']['string']['string'] = 'int' self.ttype['>']['int']['int'] = 'int' self.ttype['>']['int']['float'] = 'int' self.ttype['>']['float']['int'] = 'int' self.ttype['>']['float']['float'] = 'int' self.ttype['>']['string']['string'] = 'int' self.ttype['<=']['int']['int'] = 'int' self.ttype['<=']['int']['float'] = 'int' self.ttype['<=']['float']['int'] = 'int' self.ttype['<=']['float']['float'] = 'int' self.ttype['<=']['string']['string'] = 'int' self.ttype['>=']['int']['int'] = 'int' self.ttype['>=']['int']['float'] = 'int' self.ttype['>=']['float']['int'] = 'int' self.ttype['>=']['float']['float'] = 'int' self.ttype['>=']['string']['string'] = 'int' self.ttype['f']['string']['string'] = 'string' self.ttype['f']['int']['int'] = 'int' self.ttype['f']['float']['float'] = 'float' self.ttype['f']['float']['int'] = 'float' def error(self,text,line): self.errorsOcurred=True print("********************************") print("Error: " + text) print("Line " + str(line)) exc_type, exc_obj, exc_tb = sys.exc_info() print(exc_type, exc_tb.tb_lineno) print("********************************") def visit_Program(self,node): try: # print "visiting Program" self.symbolTable=SymbolTable(None,'main') node.classdefs.accept(self) node.declarations.accept(self) node.fundefs.accept(self) node.instructions.accept(self) except: self.error("could not continue parsing, correct errors first",0) def visit_Declarations(self,node): # print "visiting Declarations" for element in node.list : element.accept(self) def visit_Declaration(self,node): # print "visiting Declaration" toReturn = [] declType = node.typeOrId allInits = node.initsOrClassinits.accept(self) if(declType in self.types): for element in allInits: [typeOrId,id] = element if self.symbolTable.get(id.value) != None: self.error("Symbol: "+id.value+", was previusly declared",id.line) try: self.ttype['f'][declType][typeOrId] except: self.error("cannot initialize symbol of type: "+declType+", with expression of type: "+typeOrId,id.value) self.symbolTable.put(id.value,typeOrId) toReturn.append(id.value) else: typeOrId=declType.accept(self) for id in allInits: if self.symbolTable.get(id.value) != None: self.error("Symbol: "+id.value+", was previusly declared",id.line) self.symbolTable.put(id.value,typeOrId) tmp = typeOrId while tmp!=None: classTable = self.classTables[tmp.id] for element in classTable.map: self.symbolTable.put(self.makeClassContentName(typeOrId.id,id.value,element), classTable.get(element)) tmp=tmp.parentClass toReturn.append(id.value) return toReturn def makeClassContentName(self, className, objectName, fieldName): return "__"+className+objectName+fieldName def visit_Inits(self,node): # print "visiting Inits" toReturn=[] for element in node.list: toReturn.append(element.accept(self)) return toReturn def visit_Init(self,node): # print "visiting Init" return [node.expression.accept(self),node.id] def visit_Classinits(self,node): # print "visiting Classinits" toReturn=[] for element in node.list: toReturn.append(element.accept(self)) return toReturn def visit_Classinit(self,node): # print "visiting Classinit" return node.id def visit_Instructions(self,node): # print "visiting Instructions" self.symbolTable = SymbolTable(self.symbolTable,'instructions') for element in node.list : element.accept(self) self.symbolTable = self.symbolTable.getParentScope() def visit_PrintInstr(self,node): # print "visiting PrintInstr" if node.expression.accept(self) not in ['string','int','float']: self.error("cannot print expression of that type",node.line) def visit_LabeledInstr(self,node): # print "visiting LabeledInstr" node.instruction.accept(self) def visit_Assignment(self,node): # print "visiting Assignment" try: idType = node.access.accept(self) exprType = node.expression.accept(self) self.ttype['f'][idType][exprType] except: self.error("cannot assign "+exprType+" to "+idType,node.id.line) def visit_ChoiceInstr(self,node): # print "visiting ChoiceInstr" node.condition.accept(self) node.instruction.accept(self) node.elseInstruction.accept(self) def visit_Break(self,node): # print "visiting Break" pass def visit_Continue(self,node): # print "visiting Continue" pass def visit_WhileInstr(self,node): # print "visiting While" node.condition.accept(self) self.symbolTable = SymbolTable(self.symbolTable,'while') node.instruction.accept(self) self.symbolTable = self.symbolTable.getParentScope() def visit_RepeatInstr(self,node): # print "visiting Repeat" node.instructions.accept(self) node.condition.accept(self) def visit_ReturnInstr(self,node): # print "visiting Return" node.expression.accept(self) #todo check somehow def visit_CompoundInstr(self,node): # print "visiting CompoundInstr" #self.symbolTable = SymbolTable(self.symbolTable,'compoundInstr') node.declarations.accept(self) node.instructions.accept(self) #self.symbolTable = self.symbolTable.getParentScope() def visit_Condition(self,node): # print "visiting Condition" if node.expression.accept(self) not in ('int'): self.error("condition must be of int type",node.line) def visit_Integer(self,node): # print "visiting Integer" return 'int' def visit_Float(self,node): # print "visiting Float" return 'float' def visit_String(self,node): # print "visiting String" return 'string' def visit_Id(self,node): # print "visiting Id" if self.symbolTable.getIncludingParents(node.value): return self.symbolTable.getIncludingParents(node.value) self.error("undefined symbol: "+node.value,node.line) def visit_ParExpr(self,node): # print "visiting ParExpr" return node.expression.accept(self) def visit_BinExpr(self,node): operator = node.operator first = node.first.accept(self) second = node.second.accept(self) # print "visiting BinExpr" #print first #print operator #print second try: return self.ttype[operator][first][second] except: self.error("cannot compute operation: " +operator+",on arguments: "+first+", "+second,node.first.line) def visit_FunExpr(self,node): # print "visiting FunExpr" funSymbol = node.access.accept(self) for i in range(len(node.expressionList.list)): try: baseArgType = funSymbol.argList[i] givenArgType = node.expressionList.list[i].accept(self) self.ttype['f'][baseArgType][givenArgType] except: self.error("bad argument in funcall",node.line) return funSymbol.type def visit_ExprList(self,node): # print "visiting ExprList" toReturn = [] for element in node.list: toReturn.append(element.accept(self)) return toReturn def visit_FunDefs(self,node): # print "visiting FunDefs" for element in node.list : element.accept(self) def visit_FunDef(self,node): # print "visiting FunDef" self.symbolTable = SymbolTable(self.symbolTable,node.id.value) self.symbolTable.getParentScope().put(node.id.value,FunSymbol(node.typeOrId, node.id.value, map(lambda x: x.accept(self),node.argList.list))) node.compoundInstr.accept(self) self.symbolTable = self.symbolTable.getParentScope() return node.id.value def visit_ArgList(self,node): # print "visiting ArgList" toReturn = [] for element in node.list: toReturn.append(element.accept(self)) return toReturn def visit_Arg(self,node): # print "visiting Arg" self.symbolTable.put(node.id.value,node.typeOrId) return node.typeOrId def visit_ClassDefs(self,node): # print "visiting ClassDefs" for element in node.list : element.accept(self) def visit_ClassDef(self,node): # print "visiting ClassDef" self.symbolTable = SymbolTable(self.symbolTable if node.parentId == None else self.classTables[node.parentId.value], node.id.value) classSymbol = ClassSymbol(node.accessmodificator, node.id.value, node.parentId if node.parentId == None else node.parentId.accept(self),node.classcontent.accept(self)) self.classTables[node.id.value]=self.symbolTable while self.symbolTable.parent!=None: self.symbolTable = self.symbolTable.getParentScope() self.symbolTable.put(node.id.value,classSymbol) def visit_Access(self,node): # print "visiting Access" accessedObject = node.list[0].accept(self) if(isinstance(accessedObject, ClassSymbol)): if(accessedObject.hasAccess(self.symbolTable.getFirstRelevantScopeName(),node.list[1].value)): classContentName = self.makeClassContentName(accessedObject.id,node.list[0].value,node.list[1].value) if self.symbolTable.getIncludingParents(classContentName): accessedObject=self.symbolTable.getIncludingParents(classContentName) else: self.error("cannot find class content "+classContentName,0) else: self.error("trying to access field that is not visible", 0) return accessedObject def visit_Fielddefs(self, node): # print "visiting Fielddefs" toReturn=[] for element in node.list: for accessAndId in element.accept(self): toReturn.append(accessAndId) return toReturn def visit_Fielddef(self, node): # print "visiting Fielddef" toReturn=[] for element in node.declaration.accept(self): toReturn.append((element,node.accessmodificator)) return toReturn def visit_Classcontent(self, node): # print "visiting Classcontent" toReturn={} for element in node.fielddefs.accept(self): toReturn[element[0]]=element[1] for element in node.methoddefs.accept(self): toReturn[element[0]]=element[1] return toReturn def visit_Methoddefs(self, node): # print "visiting Methoddefs" toReturn=[] for element in node.list: toReturn.append(element.accept(self)) return toReturn def visit_Methoddef(self, node): # print "visiting Methoddef" return (node.fundef.accept(self), node.accessmodificator)
def translate_Cinstr_to_bin(c_instr: str) -> str: a = '111' c = parser.get_comp_bin(c_instr=c_instr) d = parser.get_dest_bin(c_instr=c_instr) j = parser.get_jump_bin(c_instr=c_instr) c_instr_bin = f"{a}{c}{d}{j}" return c_instr_bin """ main.py drives the entire translation process of the assembler. """ parser = Parser(inFile="inFile.asm") symbolTable = SymbolTable() ############# # FIRST PHASE first_pass(parser.file) ############# # SECOND PASS outFile = open('outFile.hack', 'w') while parser.hasMoreCommands(): # Set 'next_cmd' to 'curr_cmd' curr_cmd = parser.advance() curr_cmdType = parser.commandType(curr_cmd) if curr_cmdType == 'C_COMMAND':