class Parser: ''' Parses a regular expression into an abstract syntax tree. ''' ''' The order of precedence for of operators is as follows: Collation-related bracket symbols [==] [::] [..] Escaped characters \ Character set (bracket expression) [] Grouping () Single-character-ERE duplication * + ? {m,n} Concatenation Anchoring ^$ Alternation | ''' def __init__(self, regex): self.tokenizer = Tokenizer(regex) def parse(self): ast = self.parse_regex() if not self.tokenizer.atEnd(): raise ParseError() return ast def parse_regex(self): ''' regex: alternationExprn ''' return self.parse_alternationExprn() def parse_concatExprn(self): ''' concatExprn: duplicationExprn + ''' children = [] try: while not self.tokenizer.atEnd(): dupExprn = self.parse_duplicationExprn() children.append(dupExprn) except ParseError: pass if len(children) == 1: return children[0] return ConcatNode(children) def parse_anchorExprn(self): ''' anchorExprn: '^'? concatExprn? '$'? ''' anchorStart, anchorEnd = False, False if not self.tokenizer.atEnd() and self.tokenizer.cur().isSpecial( ) and self.tokenizer.cur().value == '^': anchorStart = True self.tokenizer.advance() try: concatNode = self.parse_concatExprn() except ParseError: concatNode = None if not self.tokenizer.atEnd() and self.tokenizer.cur().isSpecial( ) and self.tokenizer.cur().value == '$': anchorEnd = True self.tokenizer.advance() if not anchorStart and not anchorEnd and concatNode is None: raise ParseError() if not anchorStart and not anchorEnd: return concatNode return AnchorNode(anchorStart, anchorEnd, concatNode) def parse_alternationExprn(self): ''' alternationExprn: anchorExprn ('|' anchorExprn)* ''' left = self.parse_anchorExprn() while not self.tokenizer.atEnd() and self.tokenizer.cur().isSpecial( ) and self.tokenizer.cur().value == '|': self.tokenizer.advance() right = self.parse_anchorExprn() left = AlternationNode(left, right) return left def parse_duplicationExprn(self): ''' duplicationExprn: groupExprn ('*'|'+'|'?')? ''' grpExprn = self.parse_groupExprn() if not self.tokenizer.atEnd() and self.tokenizer.cur().isSpecial( ) and self.tokenizer.cur().value in ('*', '+', '?'): op = self.tokenizer.advance() return DuplicationNode(op, grpExprn) return grpExprn def parse_groupExprn(self): ''' groupExprn: CHAR | '(' regex ')' ''' if self.tokenizer.cur().isChar(): return CharNode(self.tokenizer.advance()) if self.tokenizer.cur().value == '(': self.tokenizer.advance() subRegex = self.parse_regex() if self.tokenizer.cur().value != ')': raise ParseError() self.tokenizer.advance() return subRegex raise ParseError()
class CompilationEngine: def __init__(self, inp_file, out_file): self.tokenizer = Tokenizer(inp_file) self.sym_tbl = SymbolTable() self.vm_writer = VMWriter(out_file) self.out_file = open(out_file, "a") self.current_token = "" self.current_token_type = "" self.curr_token_ptr = -1 self.label_counter = { "if": 0, "while": 0 } self.advance() self.compileClass() def appendToOutFile(self, content): # self.out_file.write(content) pass def xmlify(self, tag=None, content=None): if tag == None: tag = self.current_token_type if content == None: content = self.current_token html_alternate = { "<": "<", ">": ">", '"': """, "&": "&" } if content in ("<", ">", '"', "&"): content = html_alternate[content] # self.appendToOutFile(f"<{tag}> {content} </{tag}>\n") def compileClass(self): # self.appendToOutFile("<class>\n") self.eat("class") self.class_name = self.current_token # self.xmlify() self.advance() self.eat("{") self.compileClassVarDec() self.compileSubroutineDec() self.eat("}") # self.appendToOutFile("</class>") def compileClassVarDec(self): while self.current_token in ("field", "static"): var_kind = self.current_token if self.current_token == "field": var_kind = "this" # self.appendToOutFile("<classVarDec>\n") # self.xmlify() self.advance() var_type = self.current_token # self.xmlify() # print(var_type) self.advance() var_name = self.current_token # self.xmlify() # print(var_name) self.advance() # print(var_kind, var_type, var_name) self.sym_tbl.define(var_name, var_type, var_kind) while self.current_token == ",": self.eat(",") var_name_cont = self.current_token # self.xmlify() # print(var_kind, var_type, var_name_cont) self.sym_tbl.define(var_name_cont, var_type, var_kind) self.advance() self.eat(";") # self.appendToOutFile("</classVarDec>\n") # print(self.sym_tbl.class_table) def compileSubroutineDec(self): while self.current_token in ("constructor", "function", "method"): # self.appendToOutFile("<subroutineDec>\n") subroutine = "" subroutine_name = "" if self.current_token == "constructor": self.eat("constructor") self.eat(self.class_name) self.eat("new") subroutine = "constructor" subroutine_name = "new" elif self.current_token in ("function", "method"): subroutine = self.current_token # self.xmlify() self.advance() subroutine_type = self.current_token # self.xmlify() # print(subroutine_type) self.advance() subroutine_name = self.current_token # self.xmlify() # print(subroutine_name) self.advance() self.eat("(") self.compileParamList(subroutine) self.eat(")") self.compileSubroutineBody(subroutine, subroutine_name) # self.appendToOutFile("</subroutineDec>\n") def compileParamList(self, subroutine): # self.appendToOutFile("<parameterList>\n") self.sym_tbl.startSubroutine() if subroutine == "method": self.sym_tbl.define("this", self.class_name, "argument") if self.current_token != ")": param_type = self.current_token # self.xmlify() # print(param_type) self.advance() param_name = self.current_token # self.xmlify() # print(param_name) self.advance() self.sym_tbl.define(param_name, param_type, "argument") while self.current_token == ",": self.eat(",") param_type_cont = self.current_token # self.xmlify() # print(param_type_cont) self.advance() param_name_cont = self.current_token # self.xmlify() # print(param_name_cont) self.advance() self.sym_tbl.define( param_name_cont, param_type_cont, "argument") # self.appendToOutFile("</parameterList>\n") def compileSubroutineBody(self, subroutine, subroutine_name): # self.appendToOutFile("<subroutineBody>\n") self.eat("{") while self.current_token == "var": self.compilevarDec() func_name = f"{self.class_name}.{subroutine_name}" print(func_name) vars = self.sym_tbl.varCount("local") self.vm_writer.writeFunction(func_name, vars) if subroutine == "constructor": fields = self.sym_tbl.varCount("this") self.vm_writer.writePushPop("push", "constant", fields) self.vm_writer.writeCall("Memory.alloc", 1) self.vm_writer.writePushPop("pop", "pointer", 0) elif subroutine == "method": self.vm_writer.writePushPop("push", "argument", 0) self.vm_writer.writePushPop("pop", "pointer", 0) self.compileStatements() self.eat("}") # self.appendToOutFile("</subroutineBody>\n") def compilevarDec(self): # self.appendToOutFile("<varDec>\n") self.eat("var") var_type = self.current_token # self.xmlify() # print(var_type) self.advance() var_name = self.current_token # self.xmlify() # print(var_name) self.advance() self.sym_tbl.define(var_name, var_type, "local") while self.current_token == ",": self.eat(",") var_name_cont = self.current_token # self.xmlify() # print(var_name_cont) self.advance() self.sym_tbl.define(var_name_cont, var_type, "local") self.eat(";") # self.appendToOutFile("</varDec>\n") # print(self.sym_tbl.func_table) def compileStatements(self): # self.appendToOutFile("<statements>\n") while self.current_token in ("let", "if", "while", "do", "return"): if self.current_token == "let": # self.appendToOutFile("<letStatement>\n") self.compileLet() # self.appendToOutFile("</letStatement>\n") elif self.current_token == "if": # self.appendToOutFile("<ifStatement>\n") self.compileIf() # self.appendToOutFile("</ifStatement>\n") elif self.current_token == "while": # self.appendToOutFile("<whileStatement>\n") self.compileWhile() # self.appendToOutFile("</whileStatement>\n") elif self.current_token == "do": # self.appendToOutFile("<doStatement>\n") self.compileDo() # self.appendToOutFile("</doStatement>\n") elif self.current_token == "return": # self.appendToOutFile("<returnStatement>\n") self.compileReturn() # self.appendToOutFile("</returnStatement>\n") # self.appendToOutFile("</statements>\n") def compileLet(self): self.eat("let") var_name = self.current_token # self.xmlify() # print(var_name) (_type, kind, index) = self.sym_tbl.getVariable(var_name) self.advance() if self.current_token == "[": self.eat("[") self.compileExpression() self.eat("]") self.vm_writer.writePushPop("push", kind, index) self.vm_writer.writeArithmetic("add") self.vm_writer.writePushPop("pop", "temp", 0) self.eat("=") self.compileExpression() self.vm_writer.writePushPop("push", "temp", 0) self.vm_writer.writePushPop("pop", "pointer", 1) self.vm_writer.writePushPop("pop", "that", 0) else: self.eat("=") self.compileExpression() self.vm_writer.writePushPop("pop", kind, index) self.eat(";") def compileIf(self): self.eat("if") self.eat("(") self.compileExpression() self.eat(")") label_true = f"IF_TRUE{self.label_counter['if']}" label_false = f"IF_FALSE{self.label_counter['if']}" label_end = f"IF_END{self.label_counter['if']}" self.label_counter["if"] += 1 self.vm_writer.writeIf(label_true) self.vm_writer.writeGoto(label_false) self.vm_writer.writeLabel(label_true) self.eat("{") self.compileStatements() self.vm_writer.writeGoto(label_end) self.eat("}") self.vm_writer.writeLabel(label_false) if self.current_token == "else": self.eat("else") # if self.current_token == "if": # self.eat("if") # self.eat("(") # self.compileExpression() # self.eat(")") self.eat("{") self.compileStatements() self.eat("}") self.vm_writer.writeLabel(label_end) def compileWhile(self): label_while = f"WHILE_EXP{self.label_counter['while']}" label_end = f"WHILE_END{self.label_counter['while']}" self.label_counter['while'] += 1 self.eat("while") self.vm_writer.writeLabel(label_while) self.eat("(") self.compileExpression() self.vm_writer.writeArithmetic("not") self.vm_writer.writeIf(label_end) self.eat(")") self.eat("{") self.compileStatements() self.vm_writer.writeGoto(label_while) self.eat("}") self.vm_writer.writeLabel(label_end) def compileDo(self): self.eat("do") func_name = self.current_token # self.xmlify() # print(name1) self.advance() if self.current_token == ".": self.eat(".") name2 = self.current_token # method_name func_name = f"{func_name}.{name2}" # self.xmlify() # print(name2) self.advance() self.handleSubroutineCall(func_name) self.vm_writer.writePushPop("pop", "temp", 0) self.eat(";") def compileReturn(self): self.eat("return") if self.current_token != ";": self.compileExpression() else: self.vm_writer.writePushPop("push", "constant", 0) self.vm_writer.writeReturn() self.eat(";") def compileExpression(self): op_table = { '+': 'add', '-': 'sub', '&': 'and', '|': 'or', '<': 'lt', '>': 'gt', '=': 'eq' } exp = "" # self.appendToOutFile("<expression>\n") term = self.compileTerm() exp = exp + str(term) while self.current_token in ("+", "-", "*", "/", "&", "|", "<", ">", "="): op = self.current_token # self.xmlify() self.advance() term_cont = self.compileTerm() if op in op_table: self.vm_writer.writeArithmetic(op_table[op]) elif op == "*": self.vm_writer.writeCall("Math.multiply", 2) elif op == "/": self.vm_writer.writeCall("Math.divide", 2) else: raise SyntaxError("Invalid Operator") exp = exp + f" {op} {term_cont}" # self.appendToOutFile("</expression>\n") return exp def compileExpressionList(self): args = 0 # self.appendToOutFile("<expressionList>\n") if self.current_token != ")": self.compileExpression() args += 1 while self.current_token == ",": self.eat(",") self.compileExpression() args += 1 # self.appendToOutFile("</expressionList>\n") return args def compileTerm(self): full_term = "" # self.appendToOutFile("<term>\n") if self.current_token_type in ("identifier", "stringConstant", "integerConstant") or self.current_token in ("true", "false", "null", "this"): term = self.current_token # self.xmlify() # print(term) if self.current_token_type == "stringConstant": self.compileString(term) elif self.current_token_type == "integerConstant": self.vm_writer.writePushPop("push", "constant", term) print(f"push constant {term}") elif self.current_token in ("true", "false", "null"): self.vm_writer.writePushPop("push", "constant", 0) if self.current_token == "true": self.vm_writer.writeArithmetic("not") elif self.current_token == "this": self.vm_writer.writePushPop("push", "pointer", 0) full_term = str(term) if self.current_token_type == "identifier": # print(term) (_type, kind, index) = self.sym_tbl.getVariable(self.current_token) args = 0 self.advance() if self.current_token == "[": self.eat("[") exp = self.compileExpression() self.eat("]") full_term = full_term + f"[{exp}]" elif self.current_token == ".": self.eat(".") name2 = self.current_token # method_name # self.xmlify() full_term = full_term + f".{name2}" # print(name2) self.advance() if self.current_token == "(": full_term = full_term + "()" if "[" in full_term: self.vm_writer.writePushPop("push", kind, index) self.vm_writer.writeArithmetic("add") self.vm_writer.writePushPop("pop", "pointer", 1) self.vm_writer.writePushPop("push", "that", 0) elif "(" in full_term: self.handleSubroutineCall(full_term) else: self.vm_writer.writePushPop("push", kind, index) else: self.advance() elif self.current_token == "(": self.eat("(") exp = self.compileExpression() self.eat(")") full_term = full_term + f"({exp})" elif self.current_token in ("-", "~"): uop = self.current_token # self.xmlify() # print(uop) self.advance() term_cont = self.compileTerm() if uop == "-": self.vm_writer.writeArithmetic("neg") else: self.vm_writer.writeArithmetic("not") full_term = full_term + f"{uop}{term_cont}" # self.appendToOutFile("</term>\n") return full_term def handleSubroutineCall(self, func_name): args = 0 if "(" in func_name: func_name = func_name[0:-2] if "." not in func_name: func_name = f"{self.class_name}.{func_name}" args += 1 self.vm_writer.writePushPop("push", "pointer", 0) # print("pointer 0") if "." in func_name: c_name = func_name.split(".")[0] s_name = func_name.split(".")[1] (_type, kind, index) = self.sym_tbl.getVariable(c_name) if _type != None: self.vm_writer.writePushPop("push", kind, index) # print(f"push {kind} {index}") func_name = f"{_type}.{s_name}" args += 1 self.eat("(") args += self.compileExpressionList() self.eat(")") print(func_name, args) self.vm_writer.writeCall(func_name, args) def compileString(self, string): self.vm_writer.writePushPop("push", "constant", len(string)) self.vm_writer.writeCall("String.new", 1) for char in string: self.vm_writer.writePushPop("push", "constant", ord(char)) self.vm_writer.writeCall("String.appendChar", 2) def eat(self, string): if self.current_token != string: raise SyntaxError( f"Expected {string} in place of {self.current_token}") # self.xmlify() self.advance() def advance(self): if self.tokenizer.advance(): (token, token_type) = self.tokenizer.tokenWithType() html_alternate = { "<": "<", ">": ">", """: '"', "&": "&" } if token in ("<", ">", """, "&"): token = html_alternate[token] self.current_token = token self.current_token_type = token_type self.curr_token_ptr += 1 return True return False
class CompilerToXML(object): def __init__(self, jack_file_object, output_file_object): self._tokenizer = Tokenizer(jack_file_object) self.out = output_file_object self.compile_class() def _advance_expect(self, target=None, target_type=None): self._tokenizer.advance() if not target and target_type: if self._tokenizer.current_token_type in target_type: return True elif target and not target_type: if self._tokenizer.current_token in target: return True else: return False def _expect(self, target=None, target_type=None): if not target and target_type: if self._tokenizer.current_token_type in target_type: return True elif target and not target_type: if self._tokenizer.current_token in target: return True else: return False def compile_class(self): try: print('in class') if self._advance_expect(target='class'): self.out.write('<class>\n') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') else: raise SyntaxError # Write className if self._advance_expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError if self._advance_expect(target='{'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError # Need repeat self._tokenizer.advance() while self._tokenizer.current_token in ['static', 'field']: self.compile_class_var_dec() self._tokenizer.advance() # Need repeat while self._tokenizer.current_token in ['constructor', 'function', 'method']: self.compile_subroutine_dec() self._tokenizer.advance() if self._expect(target='}'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('</class>\n') else: raise SyntaxError self._tokenizer.advance() except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile class') def compile_class_var_dec(self): try: print('in class var dec') if self._expect(target=['static', 'field']): self.out.write('<classVarDec>\n') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') else: raise SyntaxError # Compile type self._tokenizer.advance() if self._tokenizer.current_token_type in ['KEYWORD', 'IDENTIFIER']: if self._expect(target=['int', 'char', 'boolean', self._tokenizer.current_token]): self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n' if self._tokenizer.current_token_type is 'KEYWORD' else f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError # End of type compilation # Write first varName if self._advance_expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError self._tokenizer.advance() while self._tokenizer.current_token is ',': self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() if self._expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError self._tokenizer.advance() if self._expect(target=';'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('</classVarDec>\n') except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in class var dec') def compile_subroutine_dec(self): try: print('in subroutine dec') if self._expect(target=['constructor', 'function', 'method']): self.out.write('<subroutineDec>\n') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') else: raise SyntaxError # Compile void|type self._tokenizer.advance() if self._tokenizer.current_token_type in ['KEYWORD', 'IDENTIFIER']: if self._expect(target=['int', 'char', 'boolean', 'void', self._tokenizer.current_token]): self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n' if self._tokenizer.current_token_type is 'KEYWORD' else f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError # End of type compilation # Write subroutine name if self._advance_expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError if self._advance_expect('('): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('<parameterList>\n') else: raise SyntaxError self._tokenizer.advance() self.compile_parameter_list() if self._expect(')'): self.out.write('</parameterList>\n') self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError self.compile_subroutine_body() self.out.write('</subroutineDec>\n') except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile_subroutine_dec') def compile_parameter_list(self): try: print('in parameter list') if not self._expect(')'): # param is type: write if self._tokenizer.current_token_type in ['KEYWORD', 'IDENTIFIER']: if self._expect(target=['int', 'char', 'boolean', self._tokenizer.current_token]): self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n' if self._tokenizer.current_token_type is 'KEYWORD' else f'<identifier> {self._tokenizer.identifier()} </identifier>\n') # End of type compilation # compile varName if self._advance_expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError if self._advance_expect(target=','): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() # print('calling compile param') self.compile_parameter_list() except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile_param_list') def compile_subroutine_body(self): try: print('in subroutine body') if self._advance_expect('{'): self.out.write('<subroutineBody>\n') self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError self._tokenizer.advance() while self._expect(target='var'): self.compile_var_dec() self._tokenizer.advance() # Shooting out next self.compile_statements() if self._expect('}'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('</subroutineBody>\n') else: raise SyntaxError except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in subroutine body') def compile_var_dec(self): try: print('in var dec') if self._expect(target='var'): self.out.write('<varDec>\n') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') else: raise SyntaxError # Compile void|type self._tokenizer.advance() if self._tokenizer.current_token_type in ['KEYWORD', 'IDENTIFIER']: if self._expect(target=['int', 'char', 'boolean', 'void', self._tokenizer.current_token]): # print(f'{self._tokenizer.current_token} isA {self._tokenizer.current_token_type}') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n' if self._tokenizer.current_token_type is 'KEYWORD' else f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError # End of type compilation # Write first varName if self._advance_expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError # following vars self._tokenizer.advance() while self._tokenizer.current_token is ',': self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() if self._expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError self._tokenizer.advance() if self._expect(';'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('</varDec>\n') except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile var dec') def compile_statements(self): # Does not self-advance, but advances at the end try: print('in statements') self.out.write('<statements>\n') while self._expect(target=['do', 'let', 'while', 'return', 'if']): print(f'statements looping {self._tokenizer.current_token}') if self._expect('do'): self.compile_do() self._tokenizer.advance() elif self._expect('let'): self.compile_let() self._tokenizer.advance() elif self._expect('while'): self.compile_while() self._tokenizer.advance() elif self._expect('return'): self.compile_return() self._tokenizer.advance() elif self._expect('if'): self.compile_if() self.out.write('</statements>\n') # Shoot next token to subroutineCall while except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in statements') def compile_do(self): try: print('in Do') self.out.write('<doStatement>\n') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') # Subroutine Call if self._advance_expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError self._tokenizer.advance() if self._expect('('): #subroutineName(expressionList) self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('<expressionList>\n') elif self._expect(target='.'): #(className|varName).subroutineName(expressionList) self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') if self._advance_expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError if self._advance_expect(target='('): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('<expressionList>\n') else: raise SyntaxError self._tokenizer.advance() self.compile_expression_list() if self._expect(')'): self.out.write('</expressionList>\n') self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError # End subroutine Call if self._advance_expect(';'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('</doStatement>\n') else: raise SyntaxError except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile_do') def compile_let(self): try: print('in let') self.out.write('<letStatement>\n') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') # varName if self._advance_expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError # with or not '[' if self._advance_expect('['): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() self.compile_expression() if self._expect(']'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() else: raise SyntaxError if self._expect('='): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError self._tokenizer.advance() self.compile_expression() if self._expect(';'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('</letStatement>\n') else: raise SyntaxError except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile_let') def compile_while(self): try: print('in while') self.out.write('<whileStatement>\n') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') if self._advance_expect('('): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError self._tokenizer.advance() self.compile_expression() if self._expect(')'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError if self._advance_expect('{'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError self._tokenizer.advance() self.compile_statements() if self._expect('}'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('</whileStatement>\n') else: raise SyntaxError except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile_while') def compile_return(self): try: print('in return') self.out.write('<returnStatement>\n') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') if not self._advance_expect(';'): self.compile_expression() if self._expect(';'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('</returnStatement>\n') else: raise SyntaxError except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile_return') def compile_if(self): try: print('in if') self.out.write('<ifStatement>\n') self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') if self._advance_expect('('): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError self._tokenizer.advance() self.compile_expression() if self._expect(')'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError if self._advance_expect('{'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError self._tokenizer.advance() self.compile_statements() if self._expect('}'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError if self._advance_expect(target='else'): self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') if self._advance_expect('{'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') else: raise SyntaxError self._tokenizer.advance() self.compile_statements() if self._expect('}'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('</ifStatement>\n') self._tokenizer.advance() else: raise SyntaxError else: self.out.write('</ifStatement>\n') except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile_if') def compile_expression(self): # does not advance by itself print('in compile expression') self.out.write('<expression>\n') # self._tokenizer.advance() self.compile_term() # shoots the next token if self._expect(target=list('+-*/&|<>=')): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() self.compile_term() self.out.write('</expression>\n') def compile_term(self): try: print('in compile term') # does not advance by itself self.out.write('<term>\n') if self._expect(target_type='INT_CONST'): self.out.write(f'<integerConstant> {self._tokenizer.int_value()} </integerConstant>\n') self._tokenizer.advance() elif self._expect(target_type='STRING_CONST'): self.out.write(f'<stringConstant> {self._tokenizer.string_value()} </stringConstant>\n') self._tokenizer.advance() print(f'after string, current token is {self._tokenizer.current_token}') elif self._expect(target_type='KEYWORD'): self.out.write(f'<keyword> {self._tokenizer.keyword()} </keyword>\n') self._tokenizer.advance() # (expression) elif self._expect(target='('): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() self.compile_expression() if self._expect(')'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() # unaryOP term elif self._expect(target=list('-~')): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() self.compile_term() # varName elif self._expect(target_type='IDENTIFIER'): print('term is identifier') self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') self._tokenizer.advance() # varName [ expression ] if self._expect('['): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() self.compile_expression() if self._expect(']'): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() # subroutineCall elif self._expect(list('(.')): if self._expect(target='('): # subroutineName (expressionList) self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('<expressionList>\n') elif self._expect(target='.'): # (className|varName).subroutineName(expressionList) self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') if self._advance_expect(target_type='IDENTIFIER'): self.out.write(f'<identifier> {self._tokenizer.identifier()} </identifier>\n') else: raise SyntaxError if self._advance_expect(target='('): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self.out.write('<expressionList>\n') else: raise SyntaxError self._tokenizer.advance() self.compile_expression_list() if self._expect(')'): self.out.write('</expressionList>\n') self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() else: raise SyntaxError # End subroutine Call else: raise SyntaxError self.out.write('</term>\n') except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in term') # shoots next token def compile_expression_list(self): # does not advance by it self try: if not self._expect(')'): print('in expression list') self.compile_expression() if self._expect(target=','): self.out.write(f'<symbol> {self._tokenizer.symbol()} </symbol>\n') self._tokenizer.advance() self.compile_expression_list() except SyntaxError: print(f'Syntax error at {self._tokenizer.current_token} in compile_param_list')
DIRECTORY_NAME = 'Square' SRC_SUFFIX = 'jack' TOKENIZED_SUFFIX = 'xml' if __name__ == '__main__': READ_PATH = '{0}/'.format(DIRECTORY_NAME) files = [f for f in listdir(READ_PATH) if f.find('.jack') != -1] for f in files: COMPILE_PATH_1 = './compiled/{0}/{1}T.{2}'.format( DIRECTORY_NAME, f.split('.')[0], TOKENIZED_SUFFIX) tokenizer = Tokenizer('{0}/{1}'.format(READ_PATH, f)) dest_file = open(COMPILE_PATH_1, 'wb') dest_file.write('<tokens>\n') tokenizer.advance() while tokenizer.has_more_tokens: next_line = ' ' token_type = tokenizer.token_type() if token_type == 'KEYWORD': next_line += '<keyword> {0} </keyword>'.format( tokenizer.keyword()) elif token_type == 'SYMBOL': next_line += '<symbol> {0} </symbol>'.format( tokenizer.symbol()) elif token_type == 'IDENTIFIER': next_line += '<identifier> {0} </identifier>'.format( tokenizer.identifier()) elif token_type == 'INT_CONST': next_line += '<integerConstant> {0} </integerConstant>'.format( tokenizer.intVal())
class CompilationEngine: def __init__(self, path): self.t = Tokenizer(path) self._class_var_types = ['static', 'field'] self._types = ['int', 'char', 'boolean', 'void'] self._subroutine_types = ['constructor', 'function', 'method'] self._statement_types = ["let", "if", "else", "while", "do", "return"] self._ops = ['+', '-', '*', '/', '&', '|', '<', '>', '='] self._unaryOps = ['-', '~'] self._constatns = ['true', 'false', 'null', 'this'] def new_node(self, key, value="\n"): node = ET.Element(key) node.tail = "\n" node.text = value return node def compileClass(self): root = self.new_node('class') append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # class self.t.advance() # eat class_name class_name = self.t.token() append_node(root, self.new_node(self.t.tokenType().value, class_name)) self.t.advance() expect_assert("{", self.t.token()) # eat open append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) while True: self.t.advance() if self.t.token() == "}": break if self.t.token() in self._class_var_types: append_node(root, self.compileClassVarDec()) elif self.t.token() in self._subroutine_types: append_node(root, self.compileSubroutine()) expect_assert('}', self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # } return root def compileClassVarDec(self): root = self.new_node('classVarDec') expect_assert(self._class_var_types, self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() # type append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) while True: self.t.advance() if self.t.token() == ';': append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) break elif self.t.token() == ',': append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) continue expect_assert(TokenType.IDENTIFIER, self.t.tokenType()) append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) return root def compileSubroutine(self): # ('constructor'|'function'|'method')('void'|type) subroutineName (parameterList) # call compile ParameterList # varDec* statements root = self.new_node('subroutineDec') expect_assert(self._subroutine_types, self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # routine type self.t.advance() # return type append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() # subroutine name expect_assert(TokenType.IDENTIFIER, self.t.tokenType()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() # eat open expect_assert("(", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) append_node(root, self.compileParameterList()) expect_assert(")", self.t.token()) # eat close append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # body append_node(root, self.compileSubroutineBody()) return root def compileSubroutineBody(self): root = self.new_node("subroutineBody") self.t.advance() # eat open expect_assert("{", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) is_statements = False while True: self.t.advance() if self.t.token() == "}": break if self.t.token() == "var": append_node(root, self.compileVarDec()) assert not is_statements, "Var must defined before Statements" else: is_statements = True append_node(root, self.compileStatements()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # eat close return root def compileParameterList(self): # type VarName, (type varName)* root = self.new_node('parameterList') while True: self.t.advance() if self.t.token() == ")": return root #expect_assert(self._types, self.t.token()) # type append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) def compileVarDec(self): # var type varName(',' varName)* root = self.new_node('varDec') expect_assert('var', self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() # type append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) while True: self.t.advance() if self.t.token() == ';': append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) break append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) return root def compileStatements(self): # letStatement | ifStatement | whileStatement | doStatement | returnStatement root = self.new_node('statements') while True: token = self.t.token() if self.t.token() == "}": self.t.back() return root #expect_assert(self._statement_types, token) if token == 'let': append_node(root, self.compileLet()) elif token == 'if': append_node(root, self.compileIf()) elif token == 'while': append_node(root, self.compileWhile()) elif token == 'do': append_node(root, self.compileDo()) elif token == 'return': append_node(root, self.compileReturn()) self.t.advance() def compileDo(self): root = self.new_node('doStatement') append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # do self.t.advance() expect_assert(TokenType.IDENTIFIER, self.t.tokenType()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() if self.t.token() == '.': # classname.subroutine append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() expect_assert(TokenType.IDENTIFIER, self.t.tokenType()) append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() expect_assert("(", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileExpressionList()) expect_assert(")", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() expect_assert(';', self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) return root def compileLet(self): root = self.new_node('letStatement') append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # let self.t.advance() # var name expect_assert(TokenType.IDENTIFIER, self.t.tokenType()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() if self.t.token() == '[': # index append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileExpression()) expect_assert(']', self.t.token()) append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() expect_assert("=", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileExpression()) expect_assert(';', self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) return root def compileWhile(self): root = self.new_node('whileStatement') append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # while self.t.advance() expect_assert("(", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileExpression()) expect_assert(")", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() expect_assert("{", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileStatements()) self.t.advance() expect_assert("}", self.t.token()) # eat close append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) return root def compileReturn(self): root = self.new_node('returnStatement') append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # return self.t.advance() if self.t.token() != ';': append_node(root, self.compileExpression()) expect_assert(';', self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) return root def compileIf(self): root = self.new_node('ifStatement') append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # if self.t.advance() expect_assert("(", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileExpression()) expect_assert(")", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() expect_assert("{", self.t.token()) append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileStatements()) self.t.advance() expect_assert("}", self.t.token()) # eat close append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() if self.t.token() == 'else': append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # if self.t.advance() expect_assert("{", self.t.token()) append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileStatements()) self.t.advance() expect_assert("}", self.t.token()) # eat close append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) else: self.t.back() return root def compileExpression(self): root = self.new_node('expression') append_node(root, self.compileTerm()) self.t.advance() if self.t.token() in self._ops: append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # binary op self.t.advance() append_node(root, self.compileTerm()) self.t.advance() return root def compileTerm(self): root = self.new_node('term') if self.t.tokenType() == TokenType.IDENTIFIER: root.append(self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() # look ahead if self.t.token() == '[': # index append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # [ self.t.advance() append_node(root, self.compileExpression()) expect_assert(']', self.t.token()) append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) elif self.t.token() == '.': append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # . self.t.advance() append_node(root, self.new_node('identifier', self.t.token())) # subroutine name self.t.advance() expect_assert("(", self.t.token()) # ( append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileExpressionList()) expect_assert(")", self.t.token()) # ) append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) else: self.t.back() elif self.t.tokenType() == TokenType.STRING_CONST: root.append(self.new_node(self.t.tokenType().value, self.t.token())) elif self.t.tokenType() == TokenType.INT_CONST: root.append(self.new_node(self.t.tokenType().value, self.t.token())) elif self.t.tokenType() == TokenType.KEYWORD: root.append(self.new_node(self.t.tokenType().value, self.t.token())) elif self.t.token() == '(': append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance() append_node(root, self.compileExpression()) expect_assert(")", self.t.token()) append_node( root, self.new_node(self.t.tokenType().value, self.t.token())) elif self.t.token() in self._unaryOps: append_node(root, self.new_node(self.t.tokenType().value, self.t.token())) # unary operation self.t.advance() append_node(root, self.compileTerm()) else: raise ValueError(f"What the f**k is this: {self.t.token()}") return root def compileExpressionList(self): root = self.new_node("expressionList") while True: if self.t.token() == ')': return root append_node(root, self.compileExpression()) if self.t.token() == ',': root.append( self.new_node(self.t.tokenType().value, self.t.token())) self.t.advance()