Ejemplo n.º 1
0
 def __init__(self, tokenizer):
     self._name = tokenizer.get_filename().replace('.jack','')
     # tokenizer for input
     self._tokenizer = tokenizer
     # symbol table
     self._symbols = SymbolTable()
     # vm output fiole
     self._writer = VMWriter(self._name + '.vm')
     # Input should be a tokenized .jack file containing one class
     assert self._tokenizer.has_more_tokens()
     self._tokenizer.advance()
     self._class = None
     self._subroutine = None
     self._counter = 0
     self.compile_class()
     self.close()
Ejemplo n.º 2
0
   def __init__(self, source_filename):
      # destination filename
      # if the original extension was .jack, then make the extension .vm
      # if the original extension was not .jack, then append .vm
      if source_filename.lower().endswith(".jack"):
         destination_filename = source_filename[:-5] + ".vm"
      else:
         destination_filename = source_filename + ".vm"

      # open the destination filename for writing
      self.destination_file = open(destination_filename, 'w')

      # create a tokenizer for the input file
      self.tokenizer = JackTokenizer(source_filename)

      # create the symbol table
      self.symbol_table = SymbolTable()

      # create the vm writer
      self.vm_writer = VMWriter(self.destination_file)
Ejemplo n.º 3
0
class CompilationEngine(object):
   # the destination file for writing
   destination_file = None

   # the tokenizer for the input file
   tokenizer = None

   # symbol table
   symbol_table = None

   # vm writer
   vm_writer = None

   # the class name
   class_name = ""

   # indicies for if and while loops
   # start at -1 because we increment before use
   while_index = -1
   if_index = -1

   # the constructor for compiling a single class
   # the next method to be called after construction must be compile_class
   # source_filename must be a single file, not a directory
   def __init__(self, source_filename):
      # destination filename
      # if the original extension was .jack, then make the extension .vm
      # if the original extension was not .jack, then append .vm
      if source_filename.lower().endswith(".jack"):
         destination_filename = source_filename[:-5] + ".vm"
      else:
         destination_filename = source_filename + ".vm"

      # open the destination filename for writing
      self.destination_file = open(destination_filename, 'w')

      # create a tokenizer for the input file
      self.tokenizer = JackTokenizer(source_filename)

      # create the symbol table
      self.symbol_table = SymbolTable()

      # create the vm writer
      self.vm_writer = VMWriter(self.destination_file)

   # compiles a complete class and closes the output file
   def compile_class(self):
      # class keyword
      tt, t = self._token_next(True, "KEYWORD", "class")

      # name of class
      tt, t = self._token_next(True, "IDENTIFIER")
      self.class_name = t

      # open brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # one or more variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["field", "static"]:
            self.compile_class_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # one or more subroutine declarations
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["constructor", "function", "method"]:
            self.compile_subroutine()
         else:
            # stop trying to process functions
            break

      # close brace
      # do not advance because we already advanced upon exiting the last loop
      tt, t = self._token_next(False, "SYMBOL", "}")

      # done with compilation; close the output file
      self.destination_file.close()

   # compiles a static declaration or field declaration
   def compile_class_var_dec(self):
      # compile the variable declaration
      # False means this is a class (not a subroutine)
      self.compile_var_dec(False)

   # compiles a complete method, function, or constructor
   def compile_subroutine(self):
      # start of subroutine
      self.symbol_table.start_subroutine()

      # constructor, function, or method keyword
      tt, type = self._token_next(False, "KEYWORD")

      # type of the return value
      # can be either keyword (void) or an identifier (any type)
      tt, t = self._token_next(True)

      # name of the method/function/constructor
      tt, name = self._token_next(True)
      name = self.class_name + "." + name

      # if the type is a method, "define" this as an argument, so the other
      # argument indexes work correctly
      if type == "method":
         self.symbol_table.define("this", self.class_name, SymbolTable.ARG)

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # arguments
      self.tokenizer.advance()
      self.compile_parameter_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t == "var":
            self.compile_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # write the function
      num_locals = self.symbol_table.var_count(self.symbol_table.VAR)
      self.vm_writer.write_function(name, num_locals)

      # write any special code at the top of the function
      if type == "constructor":
         # code to allocate memory and set "this"
         size = self.symbol_table.var_count(self.symbol_table.FIELD)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("Memory.alloc", 1)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      elif type == "function":
         # nothing special
         pass
      elif type == "method":
         # put argument 0 into pointer 0 (this)
         self.vm_writer.write_push(self.vm_writer.ARG, 0)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      else:
         print "WARNING: Expected constructor, function, or name; got", type

      # statements
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      self.tokenizer.advance()

   # compiles a (possibly empty) parameter list, not including the enclosing
   # parentheses
   def compile_parameter_list(self):
      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # keyword (variable type)
            tt, type = self._token_next(False)

            # identifier (variable name)
            tt, name = self._token_next(True)

            # the kind is always an arg, since these are all parameters to the
            # function
            kind = SymbolTable.ARG

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            # possible comma
            tt, t = self._token_next(True)
            if tt != "SYMBOL" or t != ",":
               # not a comma; stop processing parameters
               break

            self.tokenizer.advance()

   # compiles a var declaration
   # if subroutine is true, only the var keyword can be used
   # if subroutine is false, only the static and field keywords can be used
   def compile_var_dec(self, subroutine=True):
      # the keyword to start the declaration
      tt, kind = self._token_next(False, "KEYWORD")

      # check for required types
      if subroutine:
         if kind == "var":
            kind = SymbolTable.VAR
         else:
            print "WARNING: expecting var, but received %s" % (str(kind))
      else:
         if kind == "static":
            kind = SymbolTable.STATIC
         elif kind == "field":
            kind = SymbolTable.FIELD
         else:
            print "WARNING: expecting static or field, but received %s" % (str(kind))

      # type of the declaration
      # could be an identifier or a keyword (int, etc)
      tt, type = self._token_next(True)

      # name of the declaration
      tt, name = self._token_next(True, "IDENTIFIER")

      # define the variable in the symbol table
      self.symbol_table.define(name, type, kind)

      # can support more than one identifier name, to declare more than one
      # variable, separated by commas; process the 2nd-infinite variables
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t == ",":
            # another variable name follows
            tt, name = self._token_next(True, "IDENTIFIER")

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            self.tokenizer.advance()
         else:
            # no more variable names
            break

      # should be on the semicolon at the end of the line
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a sequence of statements, not including the enclosing {}
   def compile_statements(self):
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]:
            # call compile_t, where t is the type of compilation we want
            token = getattr(self, "compile_" + t)()
         else:
            # not a statement; stop processing statements
            break

   # compiles a do statement
   def compile_do(self):
      # do keyword
      tt, t = self._token_next(False, "KEYWORD", "do")

      # subroutine call
      self.tokenizer.advance()
      self.compile_subroutine_call()

      # do statements do not have a return value, so eliminate the return
      # off of the stack
      self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a let statement
   def compile_let(self):
      # let keyword
      tt, t = self._token_next(False, "KEYWORD", "let")

      # variable name
      tt, name = self._token_next(True, "IDENTIFIER")

      # possible brackets for array
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == "[":
         # array - write operation
         array = True

         # compile the offset expression
         self.tokenizer.advance()
         self.compile_expression()

         # write the base address onto the stack
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_push(segment, index)

         # add base and offset
         self.vm_writer.write_arithmetic("add")

         # we cannot yet put the result into pointer 1, since the read
         # operation (which hasn't been parsed/computed yet) may use pointer 1
         # to read from an arrya value

         # closing bracket
         tt, t = self._token_next(False, "SYMBOL", "]")

         # advance to the next token, since we are expected to be on the = for
         # the next line
         self.tokenizer.advance()
      else:
         array = False

      # equals sign
      tt, t = self._token_next(False, "SYMBOL", "=")

      # expression
      self.tokenizer.advance()
      self.compile_expression()

      if array:
         # our stack now looks like this:
         #    TOP OF STACK
         #    computed result to store
         #    address in which value should be stored
         #    ... previous stuff ...

         # pop the computed value to temp 0
         self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

         # pop the array address to pointer 1 (that)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

         # put the computed value back onto the stack
         self.vm_writer.write_push(self.vm_writer.TEMP, 0)

         # pop to the variable name or the array reference
         self.vm_writer.write_pop(self.vm_writer.THAT, 0)
      else:
         # not an array - pop the expression to the variable
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_pop(segment, index)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a while statement
   def compile_while(self):
      # labels for this while loop
      self.while_index += 1
      while_start = "WHILE_START_%d" % (self.while_index)
      while_end = "WHILE_END_%d" % (self.while_index)

      # while keyword
      tt, t = self._token_next(False, "KEYWORD", "while")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # label for the start of the while statement
      self.vm_writer.write_label(while_start)

      # the expression that is the condition of the while statement
      self.tokenizer.advance()
      self.compile_expression()

      # the closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto to the end of the loop
      # to do this, negate and then call if-goto
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(while_end)

      # the opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # the statments that is the body of the while loop
      self.tokenizer.advance()
      self.compile_statements()

      # the closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      # after the last statement of the while loop
      # need to jump back up to the top of the loop to evaluate again
      self.vm_writer.write_goto(while_start)

      # label at the end of the loop
      self.vm_writer.write_label(while_end)

      self.tokenizer.advance()

   # compiles a return statement
   def compile_return(self):
      # return keyword
      tt, t = self._token_next(False, "KEYWORD", "return")

      # possible expression to return
      tt, t = self._token_next(True)
      if tt != "SYMBOL" and t != ";":
         self.compile_expression()
      else:
         # no return expression; return 0
         self.vm_writer.write_push(self.vm_writer.CONST, 0)

      # ending semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.vm_writer.write_return()

      self.tokenizer.advance()

   # compiles a if statement, including a possible trailing else clause
   def compile_if(self):
      # it is more efficient in an if-else case to have the else portion first
      # in the code when testing, but we use the less-efficient but
      # easier-to-write true-false pattern here

      # labels for this if statement
      self.if_index += 1
      if_false = "IF_FALSE_%d" % (self.if_index)
      if_end = "IF_END_%d" % (self.if_index)

      # if keyword
      tt, t = self._token_next(False, "KEYWORD", "if")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # expression of if statement
      self.tokenizer.advance()
      self.compile_expression()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto the false label
      # if true, fall through to executing code
      # if there is no else, then false and end are the same, but having two
      # labels does not increase code size
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(if_false)

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # statements for true portion
      self.tokenizer.advance()
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      tt, t = self._token_next(True)
      if tt == "KEYWORD" and t == "else":
         # else statement exists

         # goto the end of the if statement at the end of the true portion
         self.vm_writer.write_goto(if_end)

         # label for the start of the false portion
         self.vm_writer.write_label(if_false)

         # opening brace
         tt, t = self._token_next(True, "SYMBOL", "{")

         # statements
         self.tokenizer.advance()
         self.compile_statements()

         # closing brace
         tt, t = self._token_next(False, "SYMBOL", "}")

         # end label
         self.vm_writer.write_label(if_end)

         # advance tokenizer only if we are in the else, since otherwise the
         # token was advanced by the else check
         self.tokenizer.advance()
      else:
         # no else portion; only put in a label for false, since end is not
         # used
         self.vm_writer.write_label(if_false)

   # compiles an expression (one or more terms connected by operators)
   def compile_expression(self):
      # the first term
      self.compile_term()

      # finish any number of operators followed by terms
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t in "+-*/&|<>=":
            # found an operator
            # postfix order - add the next term and then do the operator

            # the next term
            self.tokenizer.advance()
            self.compile_term()

            # the operator
            if t == "+":
               self.vm_writer.write_arithmetic("add")
            if t == "-":
               self.vm_writer.write_arithmetic("sub")
            if t == "=":
               self.vm_writer.write_arithmetic("eq")
            if t == ">":
               self.vm_writer.write_arithmetic("gt")
            if t == "<":
               self.vm_writer.write_arithmetic("lt")
            if t == "&":
               self.vm_writer.write_arithmetic("and")
            if t == "|":
               self.vm_writer.write_arithmetic("or")
            if t == "*":
               self.vm_writer.write_call("Math.multiply", 2)
            if t == "/":
               self.vm_writer.write_call("Math.divide", 2)
         else:
            # no term found; done parsing the expression
            break

   # compiles a term
   # this routine is faced with a slight difficulty when trying to decide
   # between some of the alternative parsing rules. specifically, if the
   # current token is an identifier, the routine must distinguish between a
   # variable, an array entry, and a subroutine call. a single lookahead token,
   # which may be one of [, (, or ., suffices to distinguish between the three
   # possibilities. any other token is not part of this term and should not
   # be advanced over.
   def compile_term(self):
      # a term: integer_constant | string_constant | keyword_constant |
      # varname | varname[expression] | subroutine_call | (expression) |
      # unary_op term
      tt, t = self._token_next(False)
      if tt == "INT_CONST":
         self.vm_writer.write_push(self.vm_writer.CONST, t)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "STRING_CONST":
         # after this portion is run, a pointer to a string should be on the
         # stack
         # we create a new string of a certain size and then append characters
         # one by one; each append operation returns the pointer to the same
         # string

         # create the string
         # string is a len, data tuple; not null-terminated
         size = len(t)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("String.new", 1)

         # append each character
         for char in t:
            self.vm_writer.write_push(self.vm_writer.CONST, ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "KEYWORD":
         if t == "true":
            # true is -1, which is 0 negated
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
            self.vm_writer.write_arithmetic("not")
         elif t == "false" or t == "null":
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
         elif t == "this":
            self.vm_writer.write_push(self.vm_writer.POINTER, 0)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "SYMBOL" and t == "(":
         # ( expression )

         # parse the expression
         self.tokenizer.advance()
         self.compile_expression()

         # closing parenthesis
         tt, t = self._token_next(False, "SYMBOL", ")")

         # advance for the next statement
         self.tokenizer.advance()

      elif tt == "SYMBOL" and t in "-~":
         # unary_op term
         # postfix order - add the next term and then do the operator

         # parse the rest of the term
         self.tokenizer.advance()
         self.compile_term()

         # write the unary operation
         if t == "-":
            self.vm_writer.write_arithmetic("neg")
         elif t == "~":
            self.vm_writer.write_arithmetic("not")

      elif tt == "IDENTIFIER":
         # varname, varname[expression], subroutine_call

         # do not write the identifer yet

         # get the next bit of the expression
         # if it is a [, then array; if it is a ( or ., then subroutine call
         # if none of above, then pass over
         tt2, t2 = self._token_next(True)

         if tt2 == "SYMBOL" and t2 in "(.":
            # subroutine call
            # back up and then compile the subroutine call
            self.tokenizer.retreat()

            self.compile_subroutine_call()
         elif tt2 == "SYMBOL" and t2 == "[":
            # array - read operation

            # write the base address onto the stack
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

            # compile the offset expression
            self.tokenizer.advance()
            self.compile_expression()

            # add base and offset
            self.vm_writer.write_arithmetic("add")

            # put the resulting address into pointer 1 (that)
            self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

            # read from that 0 onto the stack
            self.vm_writer.write_push(self.vm_writer.THAT, 0)

            # closing bracket
            tt, t = self._token_next(False, "SYMBOL", "]")

            # advance for the next statement
            self.tokenizer.advance()
         else:
            # none of above - just a single identifier
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

      else:
         # unknown
         print "WARNING: Unknown term expression object:", tt, t

   # compiles a (possible empty) comma-separated list of expressions
   def compile_expression_list(self):
      num_args = 0

      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # expression to pass
            self.compile_expression()
            num_args += 1

            # possible comma
            tt, t = self._token_next(False)
            if tt == "SYMBOL" and t == ",":
               self.tokenizer.advance()
            else:
               # not a comma; stop processing parameters
               break

      return num_args

   # compiles a subroutine call
   # two cases:
   # - subroutineName(expressionList)
   # - (class|var).subroutineName(expressionList)
   def compile_subroutine_call(self):
      # first part of name
      tt, name1 = self._token_next(False, "IDENTIFIER")

      # a dot and another name may exist, or it could be a parenthesis
      name2 = None
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == ".":
         # the name after the dot
         tt, name2 = self._token_next(True, "IDENTIFIER")

         # advance so that we are on the parenthesis
         self.tokenizer.advance()

      # determine if this is a method call
      # three possibilities
      # - class.func() - function call
      # - var.func()   - method call
      # - func()       - method call on current object
      if self.symbol_table.contains(name1):
         method_call = True
         local_call = False
      elif name2 == None:
         method_call = True
         local_call = True
      else:
         method_call = False

      # if a method call, push variable name1
      # this a method call if the symbol table contains name1 and name2 exists
      # OR name1 is a method in the current object
      if method_call and local_call:
         # push the current object onto the stack as a hidden argument
         self.vm_writer.write_push(self.vm_writer.POINTER, 0)
      elif method_call and not local_call:
         # push the variable onto the stack as a hidden argument
         segment, index = self._resolve_symbol(name1)
         self.vm_writer.write_push(segment, index)

      # opening parenthesis
      tt, t = self._token_next(False, "SYMBOL", "(")

      # expression list
      self.tokenizer.advance()
      num_args = self.compile_expression_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # write the call
      if method_call and local_call:
         # methd + <blank>

         # get the name of the vm function to call
         classname = self.class_name
         vm_function_name = classname + "." + name1

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      elif method_call and not local_call:
         # variable name + method

         # get the name of the vm function to call
         classname = self.symbol_table.get(name1)[1]
         vm_function_name = classname + "." + name2

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)
      else:
         # get the name of the vm function to call
         vm_function_name = name1 + "." + name2

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      self.tokenizer.advance()

   # returns the token_type and token of the next token after advancing the
   # tokenizer before reading if advance is True
   def _token_next(self, advance=False, expected_type=None, expected_value=None):
      # advance the tokenizer, if requested
      if advance:
         self.tokenizer.advance()

      # get the token type and the token itself
      token_type = self.tokenizer.token_type()
      token = str(getattr(self.tokenizer, token_type.lower())())

      if expected_type and token_type != expected_type:
         print "WARNING: Type", token_type, "found; expected", expected_type
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)
      if expected_value and token != expected_value:
         print "WARNING: Value", token, "found; expected", expected_value
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)

      return token_type, token

   # convets a symbol table type into a segment type
   def _type_to_segment(self, type):
      if type == self.symbol_table.STATIC:
         return self.vm_writer.STATIC
      elif type == self.symbol_table.FIELD:
         return self.vm_writer.THIS
      elif type == self.symbol_table.ARG:
         return self.vm_writer.ARG
      elif type == self.symbol_table.VAR:
         return self.vm_writer.LOCAL
      else:
         print "ERROR: Bad type %s" % (str(type))
 
   # resolves the symbol from the symbol table
   # the segment and index is returned as a 2-tuple
   def _resolve_symbol(self, name):
      kind, type, index = self.symbol_table.get(name)
      return self._type_to_segment(kind), index
Ejemplo n.º 4
0
	def __init__(self,infile,outfile):
		self.writer = VMWriter(outfile)
		self.token = JackToken(infile)
		self.table = SymbolTable()
Ejemplo n.º 5
0
class CompilationEngine:
	#------------------------------------------------------------------------------
	# Var Declar:
	#------------------------------------------------------------------------------
	
	#stores all the different key words
	key_class='CLASS'
	key_method='METHOD'
	key_function='FUNCTION'
	key_constructor='CONSTRUCTOR'
	key_int='INT'
	key_boolean='BOOLEAN'
	key_char='CHAR'
	key_void='VOID'
	key_var='VAR'
	key_static='STATIC'
	key_field='FIELD'
	key_let='LET'
	key_do='DO'
	key_if='IF'
	key_else='ELSE'
	key_while='WHILE'
	key_return='RETURN'
	key_true='TRUE'
	key_false='FALSE'
	key_null='NULL'
	key_this='THIS'
	
	#stores all the token types
	keyword='KEYWORD'
	sym='SYMBOL'
	ident='IDENTIFIER'
	intc='INT_CONST'
	string_c='STRING_CONST'

	#This stores the convertions from the jack kind to the appropriate segment field
	segment = {'VAR':'local', 'STATIC':'static', 'FIELD':'this', 'ARG':'argument'}

	#Stores counters for lables of loops and if/else statments
	loopCounter = 0
	ifCounter = 0

	#--------------------------------------------------------------------------
	# Class declaration:
	#--------------------------------------------------------------------------

	#------------------------------------------------------------------------------
	# This is the constructor
	def __init__(self,infile,outfile):
		self.writer = VMWriter(outfile)
		self.token = JackToken(infile)
		self.table = SymbolTable()
	
	#------------------------------------------------------------------------------
	# This method compiles the entire class contained in the input file
	def compileClass(self):
		self.token.advance()

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_class in tempkey:
					s = "Place holder nothing to do here"
				
				#if the keyword is static or field then it is known that it is a class var dec
				#at this level of compilation
				elif self.key_static in tempkey or self.key_field in tempkey:
					self.compileClassVarDec()
					continue #continue because there maybe more then one class var and don't want to advane tokenizer

				#if the keyword is a subroutine type
				elif self.key_constructor in tempkey or self.key_method in tempkey or self.key_function in tempkey:
					self.compileSubroutine()

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				#if we run into } at this level then we are at the end of the class
				if '}' in tempsym:
					break


			elif self.ident in tokentype:
				tempident = self.token.identifier()

				#stores the name of the class we are in for calling methods from
				#with in this class and for other things as well
				self.currClassName = tempident

			self.token.advance()

		self.writer.close()

	#------------------------------------------------------------------------------
	# This method compiles class var dec
	def compileClassVarDec(self):
		curtype = ""
		curkind = ""
		curname = ""

		while self.token.hasMoreTokens:
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey:
					curtype = tempkey

				elif self.key_static in tempkey or self.key_field in tempkey:
					curkind = tempkey

				#if we run into a subroutine declaration then we break
				elif self.key_function in tempkey or self.key_method in tempkey or self.key_constructor in tempkey:
					break
					
			elif self.ident in tokentype:
				tempident = self.token.identifier()

				#if the curtype string is empty then its type is an object
				if len(curtype) == 0:
					curtype = tempident
				else:
					curname = tempident

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				#if it runs into any of the below symboles then it is an invalid var decleration
				if re.search('[\(\)\{\}\[\]\.\+\-\*\/\&\<\>\=\~]{1}',tempsym) is not None:
					print(self.token.errorMsg())
					sys.exit(0)

				#if we run into a ; then it is the end of this particular class var dec
				if ';' in tempsym:
					#want to advance past ; so the calling method can do the proper checks
					self.token.advance()

					self.table.Define(curname,curtype,curkind)
					break

				self.table.Define(curname,curtype,curkind)

				#clears the curname for cases like 'FIELD int haberdash, x, y' all have same
				#type and kind but different names
				curname = ''

			self.token.advance()

	#------------------------------------------------------------------------------
	# This method compiles the subroutines
	def compileSubroutine(self):
		self.table.startSubroutine()
		self.curSubType = ''
		if_param = False #ensures that at least an empty param list is discovered

		#this is to tell other methods that the current block being read in is a constructor and to take
		#the appropriate actions
		self.isConstruct = False

		isFunct = False

		while self.token.hasMoreTokens:
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_method in tempkey or self.key_function in tempkey or self.key_constructor in tempkey:
					#sets isConstruct to true if the keyword is constructor or false other wise
					self.isConstruct = True if self.key_constructor in tempkey else False

					#sets isFunct to true if the keyword is function or false other wise
					isFunct = True if self.key_function in tempkey else False

				elif self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey or self.key_void in tempkey:
					self.curSubType = tempkey

				#if the keyward var is in tempkey then we need to compile a vardeck
				elif self.key_var in tempkey:
					self.compileVarDec()

				#if it runs into any keywords that aren't caught by the above statements then it is no longer
				#in a subroutine
				else:
					self.writer.writeFunction(self.currClassName+'.'+self.curSubName,self.table.varCount('VAR'))
					break

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				#if it runs into a ( then it is descovering a parameter list
				if '(' in tempsym:
					self.token.advance()

					self.compileParameterList(self.isConstruct or isFunct)

					if_param = True #set param list discovered to true

				#if it has fond at lest an empty paramlist then it can print the next symboles 
				elif if_param:
					s = "this is does nothing just place holeder"

				#error
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			elif self.ident in tokentype:
				#if cursubtype is empty then the return type is an object
				#used for compiling returns and type checking
				if len(self.curSubType) == 0:
					self.curSubType = self.token.identifier()

				else:
					self.curSubName = self.token.identifier()

			self.token.advance()

		#If this was defined as an argument then the subroutine is not a function or constructor
		#thus we need to set the this pointer in the subroutine to the first argument passed in
		if 'NONE' not in self.table.kindOf('this'):
			self.writer.writePush(self.segment[self.table.kindOf('this')],repr(self.table.indexOf('this')))
			self.writer.writePop('pointer','0')

		#if it is a constructor then we need to allocate memory for the object
		if self.isConstruct:
			self.writer.writePush('constant',repr(self.table.varCount('FIELD')))
			self.writer.writeCall('Memory.alloc',1)
			self.writer.writePop('pointer','0')

		#compile the body of the subroutine
		self.compileStatements()

		self.loopCounter = 0
		self.ifCounter = 0
		self.curSubName = ''

	#------------------------------------------------------------------------------
	# This method compiles the parameter list
	def compileParameterList(self,isConstruct):
		curname = ''
		curtype = ''
		curkind = ''

		#If it isn't a constructor then we need to define this as the
		#first argument
		if not isConstruct:
			self.table.Define('this',self.currClassName,'ARG')

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()
				curtype = tempkey

			elif self.ident in tokentype:
				tempident = self.token.identifier()
				
				#if the curtype is empty then its type is an object
				if len(curtype) == 0:
					curtype = tempident
				else:
					curname = tempident

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				#if it runs into a ) means the end of the parameter list so break
				if ')' in tempsym:
					self.table.Define(curname, curtype, 'ARG')
					break

				#seperation of the parameters
				elif ',' in tempsym:
					self.table.Define(curname, curtype, 'ARG')

					curname = ''
					curtype = ''

				#any other symbol results in a an error
				else:
					print(self.token.errorMsg())
					sys.exit(0)
			
			self.token.advance()
		
		#advance twice because we are at ( so need to getpast that and need to get the next symbol
		self.token.advance()
		self.token.advance()

	#------------------------------------------------------------------------------
	# This method compiles the var decliration
	def compileVarDec(self):
		curname = ''
		curtype = ''

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_var in tempkey:
					s = 'Place holder does nothing just ensures that a var is seen'

				elif self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey:
					curtype = tempkey

				#if any keyword is docovered than what is above then the vardec is over
				else:
					break

			elif self.ident in tokentype:
				tempident = self.token.identifier()
				
				#if the curtype is empty then its type is an object
				if len(curtype) == 0:
					curtype = tempident
				else:
					curname = tempident

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				if ',' in tempsym:
					self.table.Define(curname,curtype, 'VAR')
					curname = ''

				#once ; is found then at the end of a vardec
				elif ';' in tempsym:
					self.table.Define(curname,curtype, 'VAR')
					break

			self.token.advance()

	#------------------------------------------------------------------------------
	# This method compiles the statements
	def compileStatements(self):

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				#if 'let' is found then compilelet
				if self.key_let in tempkey:
					self.compileLet()

				elif self.key_if in tempkey:
					self.compileIf()
					
					#continue because we could have multiple if statements found and
					#the current token could be the key word if so we don't want to advance
					#the tokenizer prematurely
					continue

				elif self.key_while in tempkey:
					self.compileWhile()

				elif self.key_do in tempkey:
					self.compileDo()

				elif self.key_return in tempkey:
					self.compileReturn()

				#incorrect key word at this level of compilation
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			elif self.sym in tokentype:
				tempsym = self.token.symbol()
				#once we run into } thats the endof statments
				if '}' in tempsym:
					break
				#any other symbol discovered at this stage is an error
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			self.token.advance()

	#------------------------------------------------------------------------------
	# This method compiles the do 
	def compileDo(self):
	
		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_do in tempkey:
					s = 'Place holder this does nothing'

				#if any keyword other then do is discovered at this level it results in an error
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			elif self.ident in tokentype:
				#compiles the expression with the value for a subroutine call passed in being true
				self.compileExpression(True)
				
				self.token.advance()
				break

			self.token.advance()

		#need to pop the return value of the stack so that it doesn't interfeer
		#with other operations
		self.writer.writePop('temp','0')
		
	#------------------------------------------------------------------------------
	# This method compiles the letStatement
	def compileLet(self):
		isArray = False

		leftSideEq = ''

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_let in tempkey:
					s = 'Place holder this does nothing'

				#if any other keyword is discovered it is an error
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			elif self.ident in tokentype:
				tempident = self.token.identifier()
				peak = self.token.peak()

				#if [ is discovered it means that it is an array access
				if '[' in peak:
					self.token.advance()
					self.token.advance()

					kind = self.table.kindOf(tempident)

					#if the identifiers kind is non then it is an udefined variable
					if "NONE" in kind:
						print(self.token.errorMsg()+"Undefined Variable\n")
						sys.exit(0)

					#pushs the arrays location on to the stack
					self.writer.writePush(self.segment[kind],repr(self.table.indexOf(tempident)))

					#compiles the expression for the index
					self.compileExpression(False)

					#adds the result of the expression to the base location
					self.writer.writeArithmetic('+')

					isArray = True
			
					self.token.advance()

					#continue so that the bellow error catching isn't accidently triped hence the advance command
					#before this
					continue

				else:
					kind = self.table.kindOf(tempident)

					if "NONE" in kind:
						print(self.token.errorMsg()+"Undefined Variable\n")
						sys.exit(0)

					#stores the lefside idetifier if it isn't an array
					leftSideEq = tempident

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				#this means that we compile th expression on the other side of the = sign
				if '=' in tempsym:
					self.token.advance()

					self.compileExpression(False)

					#if we are setting an array location (left side of  =) to the expressions result 
					if isArray:
						#pop expressions result into temp 0
						self.writer.writePop('temp','0')

						#sets that to what the left side resulted in
						self.writer.writePop('pointer','1')

						#pushs temp back on to stack and pops it to that at 0
						self.writer.writePush('temp','0')
						self.writer.writePop('that','0')

					#other wise pop it to the variables location
					else:
						kind = self.table.kindOf(leftSideEq)
						self.writer.writePop(self.segment[kind],repr(self.table.indexOf(leftSideEq)))

					#sets tempsym to the current symbole
					tempsym = self.token.symbol()
				
				#if tempsym at this point is ; then end of let statement
				if ';' in self.token.symbol():
					break

				#othre wise it is an error
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			self.token.advance()

	#------------------------------------------------------------------------------
	# This method compiles the whileStatement
	def compileWhile(self):
		#lables for the begenning and the exit of a loop
		curLoop = self.curSubName+'.loop.'+repr(self.loopCounter)
		curLoopExit = curLoop+'.EXIT'

		#incremets loop counter so that all loop for this subroutine will have
		#unique exit and begin label
		self.loopCounter += 1

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_while in tempkey:
					self.writer.writeLabel(curLoop)
				
				#if any other keyword is discovered at this level it is an error
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				#the condition of the while loop
				if '(' in tempsym:
					self.token.advance()

					self.compileExpression(False)

					#not the result of the exprssion that if the expression
					#is false we jump the loops exit
					self.writer.writeArithmetic('~')
					self.writer.writeIf(curLoopExit)

				#body of the while loop
				elif '{' in tempsym:
					self.token.advance()

					self.compileStatements()

					#bottom of loop need to go back to the top
					self.writer.writeGoto(curLoop)

					#once the statments are compiled the whilestatment is done
					break

				#any other symbol at this level results in an error
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			self.token.advance()

		self.writer.writeLabel(curLoopExit)

	#------------------------------------------------------------------------------
	# This method compiles the ReturnStatement
	def compileReturn(self):

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_return in tempkey:
					s = "Place holder does nothing"

				#Any other keyword means that an exprssion is to be compiled and return is done
				else:
					self.compileExpression(False)

					self.token.advance()
					break

			#other wise compile expression
			elif self.ident in tokentype or self.string_c in tokentype or self.intc in tokentype:
				self.compileExpression(False)

				self.token.advance()
				break

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				#denotes the end of a return statment
				if ';' in tempsym:
					#if the current subroutines type is the same as the class
					#then it is a constructor and needs to return the this pointer
					if self.curSubType == self.currClassName:
						self.writer.writePush('pointer','0')

					#if we reach this point and void is not the subroutines type
					#then the user must need to return a value
					elif self.key_void not in self.curSubType:
						print(self.token.errorMsg()+'must return something\n')
						sys.exit(0)

					#if void is the subroutines type return 0
					else:
						self.writer.writePush('constant','0')

					break

				#any other symbol at this level is an error
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			self.token.advance()

		self.writer.writeReturn()
		
	#------------------------------------------------------------------------------
	# This method compiles the ifStatement
	def compileIf(self):
		#labels for the else part of if and the exit of both if and else statents
		currIf = self.curSubName+'.else.'+repr(self.ifCounter)
		currIfExit = self.curSubName+'.if.'+repr(self.ifCounter)+'.EXIT'
		
		#ensurest that all future if|else blocks have unique labels for this
		#subroutine
		self.ifCounter += 1

		ifElse = False

		#this means that keyword if has been seen only once so if it seen again
		#that means it is a seperate if statment 
		seen_once = True

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_if in tempkey and seen_once:
					s = 'Place holeder does nothing'

				elif self.key_else in tempkey and not ifElse:
					ifElse = True

					#write the jump to the exit of the if/else block
					self.writer.writeGoto(currIfExit)

					#Else part of the block
					self.writer.writeLabel(currIf)

				#if any other keyword is seen then it is the end of an if statement
				else:
					break

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				#The condition of an if statment
				if '(' in tempsym:
					self.token.advance()

					self.compileExpression(False)

					self.writer.writeArithmetic('~')
					self.writer.writeIf(currIf)

				#body of an if|else statment
				elif '{' in tempsym:
					self.token.advance()

					self.compileStatements()

					seen_once = False

					#if part of an if else block then break
					if ifElse:
						self.token.advance()
						break

				#just incase this catches } which means that its
				#the end of an if else block that isn't this one
				elif '}' in tempsym:
					break

			self.token.advance()

		#if an if/else block write the exit label
		if ifElse:
			self.writer.writeLabel(currIfExit)

		else:
			self.writer.writeLabel(currIf)

	#------------------------------------------------------------------------------
	# This method compiles the expression
	# @param: if this is part of an enclosed statment meanig args to another sub
	# 		  routine
	def compileExpression(self,enclosed):

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.sym in tokentype:
				tempsym = self.token.symbol()

				#this means that we have term to compile with a potential unary op
				if tempsym in '(~-':
					self.compileTerm(enclosed,True,False,'')

				#signifies the end of an expression
				elif tempsym in ';)],':
					break

			else:
				self.compileTerm(enclosed,False,False,'')

			self.token.advance()

	#------------------------------------------------------------------------------
	# This method compiles the term
	# @param: if argument or array expression
	# @param: if the term contains a unary operator
	# @param: if the method was recursively called
	# @param: the previous sumbol if recursively called
	def compileTerm(self,enclosed,isUnary,callfromTerm,prevSym):

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.keyword in tokentype:
				tempkey = self.token.keyWord()

				if self.key_true in tempkey:
					#pushes -1 onto the stack
					self.writer.writePush('constant','1')
					self.writer.writeArithmetic('NEG')

				elif self.key_false in tempkey:
					self.writer.writePush('constant','0')

				elif self.key_null in tempkey:
					self.writer.writePush('constant','0')

				elif self.key_this in tempkey:
					self.writer.writePush('pointer','0')
					
				
				#any other keyword than the ones above results in an error
				else:
					print(self.token.errorMsg())
					sys.exit(0)

			elif self.ident in tokentype:
				tempident = self.token.identifier()
				#peaks at the next token to determine the type of call
				peaks = self.token.peak()

				#means that it as a call to a var or class method
				if '.' in peaks:
					callName = ''
					numArgs = 0
					
					typeof = self.table.typeOf(tempident)

					#if the type is none then we are calling a function or constructor not a method
					if 'NONE' in typeof:
					 	callName = tempident

					else:
					 	callName = typeof
					 	numArgs += 1
					 	#push the objects location value as the first argument
					 	self.writer.writePush(self.segment[self.table.kindOf(tempident)],repr(self.table.indexOf(tempident)))

					self.token.advance()

					callName += self.token.symbol()

					self.token.advance()

					#checks to see if the next token is an identifier if not error
					if self.ident in self.token.tokenType():
						callName += self.token.identifier()

					else:
						print(self.token.errorMsg())
						sys.exit(0)
					
					self.token.advance()

					#if the token type is not a symbol then error
					if self.sym not in self.token.tokenType():
						print(self.token.errorMsg())
						sys.exit()

					self.token.advance()

					#then compiles the expression list and gets the number of arguments
					numArgs += self.compileExpressionList()

					self.writer.writeCall(callName,numArgs)

				#this means that it is a subroutine call to one of its own methods
				elif '(' in peaks:
					#calling one of its own methods so push this pointer onto the stack as the first argument
					#to the function
					self.writer.writePush('pointer','0')
					
					self.token.advance()
					self.token.advance()

					#gets the number of arguments from the expression list and adds 1 for the this pointer pushed
					#on earlier
					numArgs = self.compileExpressionList()+1

					self.writer.writeCall(self.currClassName+'.'+tempident,numArgs if numArgs != 0 else 1)

				#this means that it is accessing an array element
				elif '[' in peaks:
					self.token.advance()
					self.token.advance()

					kind = self.table.kindOf(tempident)

					#if the kind of the identifier is none then it wasn't defined
					if "NONE" in kind:
						print(self.token.errorMsg()+"Undefined Variable\n")
						sys.exit(0)

					#push base location of the array onto the stack
					self.writer.writePush(self.segment[kind],repr(self.table.indexOf(tempident)))

					#calc offset
					self.compileExpression(enclosed)

					#add offset to base
					self.writer.writeArithmetic('+')

					#set that to the new value
					self.writer.writePop('pointer','1')
					
					#get the value at the offset
					self.writer.writePush('that','0')

				#other wise it is just an identifier
				else:
					kind = self.table.kindOf(tempident)

					if "NONE" in kind:
						print(self.token.errorMsg()+"Undefined Variable\n")
						sys.exit(0)

					self.writer.writePush(self.segment[kind],repr(self.table.indexOf(tempident)))

			elif self.intc in tokentype:
				self.writer.writePush('constant',self.token.intVal())

			elif self.string_c in tokentype:
				string = self.token.stringVal()

				#creates a new string of the appropriate length
				self.writer.writePush('constant', repr(len(string)))
				self.writer.writeCall('String.new',1)

				#appends each new character to the string
				for c in string:
					self.writer.writePush('constant',repr(ord(c)))
					self.writer.writeCall('String.appendChar',2)

			elif self.sym in tokentype:
				tempsym = self.token.symbol()

				#this means that it is and expression surrounded by ()
				if '(' in tempsym:
					self.token.advance()

					self.compileExpression(True)

					enclosed = True

				#not unary operator 
				elif '~' in tempsym:
					self.token.advance()

					self.compileTerm(enclosed,False,False,prevSym)

					self.writer.writeArithmetic(tempsym)

				elif '-' in tempsym and isUnary and not enclosed:
					self.token.advance()

					self.compileTerm(enclosed,False,False,prevSym)

					self.writer.writeArithmetic('NEG')

				#operator
				elif tempsym in '+-*/&|<>=':
					self.token.advance()

					#if this was recursivelly called then need to print symble
					#of previous call ensures that the correct values on the stack
					#are used
					if callfromTerm:
						if '*' in prevSym:
							self.writer.writeCall('Math.multiply',2)

						elif '/' in prevSym:
							self.writer.writeCall('Math.divide',2)

						else:
							self.writer.writeArithmetic(prevSym)

					what = self.compileTerm(enclosed,False,True,tempsym)

					#if the return value is true and is the end of the expression
					if what and self.token.peak() in ']);,':
						if '*' in tempsym:
							self.writer.writeCall('Math.multiply',2)
	
						elif '/' in tempsym:
							self.writer.writeCall('Math.divide',2)
	
						else:
							self.writer.writeArithmetic(tempsym)
						
						#return false becuase we don't want to write anything
						#more from this block
						return False

					#if what is false and at the end of the expression
					#return false
					elif not what and self.token.peak() in ']);,':
						return False
					
			#if the next token is ]);, means the end of a term
			if self.token.peak() in ']);,':
				break

			self.token.advance()
		
		return True

	#------------------------------------------------------------------------------
	# This method compiles the expressionList
	def compileExpressionList(self):
		expressCount = 0

		while self.token.hasMoreTokens():
			tokentype = self.token.tokenType()

			if self.sym in tokentype:
				tempsym = self.token.symbol()

				#indicates teh start of another expression
				if ',' in tempsym:
					self.token.advance()

					self.compileExpression(False)

					expressCount += 1

				#indicates that end of expression list
				elif ')' in tempsym:
					break

				else:
					self.compileExpression(False)
					expressCount += 1
			else:
				self.compileExpression(False)
				expressCount += 1

		return expressCount

#-------------------End Class--------------------------------------------------
Ejemplo n.º 6
0
 def __init__(self, symbolTable, tokenizer, filename):
     self.symbolTable = symbolTable
     self.tokenizer = tokenizer
     self.writer = VMWriter(filename)
     self.lines = list()
     self.label_count = 0
Ejemplo n.º 7
0
class CompileEngine:
    def __init__(self, symbolTable, tokenizer, filename):
        self.symbolTable = symbolTable
        self.tokenizer = tokenizer
        self.writer = VMWriter(filename)
        self.lines = list()
        self.label_count = 0

    def writeFile(self):
        self.writer.close()

    def writeOpen(self, tag):
        self.lines.append("<{}>".format(tag))

    def writeClose(self, tag):
        self.lines.append("</{}>".format(tag))

    def writeTerminal(self, tag, value):
        self.lines.append("<{}> {} </{}>".format(tag, value, tag))

    def advance(self):
        if (self.tokenizer.hasMoreTokens()):
            self.tokenizer.advance()

    def uniqueLabel(self, label):
        self.label_count += 1
        return label + str(self.label_count)

    def compileClass(self):
        self.advance()
        self.writeOpen("class")
        self.compileItem()  # class
        self.classname = self.tokenizer.identifier
        self.compileItem()  # name
        self.compileItem()  # {

        while (self.tokenizer.keyWord in ["STATIC", "FIELD"]):
            self.compileClassVarDec()

        while (self.tokenizer.keyWord in ["CONSTRUCTOR", "FUNCTION",
                                          "METHOD"]):
            self.compileSubroutine()

        self.compileItem()  # }
        self.writeClose("class")

    def compileClassVarDec(self):
        self.writeOpen("classVarDec")
        self.compileVarDecList()
        self.writeClose("classVarDec")

    def compileVarDecList(self):
        kind = self.tokenizer.keyWord
        self.compileItem()  # static/field/var
        thetype = self.tokenizer.identifier if self.tokenizer.identifier else self.tokenizer.keyWord
        self.compileItem()  # type
        count = 0
        while (self.tokenizer.symbol != ";"):
            count += 1
            self.compileNewIdentifier(thetype, kind)  # name
            if (self.tokenizer.symbol == ","):
                self.compileItem()  # ,
        self.compileItem()  # ;
        return count

    def compileSubroutine(self):
        self.writeOpen("subroutineDec")
        funtype = self.tokenizer.keyWord
        self.symbolTable.startSubroutine(funtype == "METHOD")
        self.compileItem()  # constructor/function/method
        self.compileItem()  # return type
        label = "{}.{}".format(self.classname, self.tokenizer.identifier)
        self.compileItem()  # name
        self.compileItem()  # (
        self.compileParameterList()
        nArgs = self.symbolTable.varCount("ARG")
        self.compileItem()  # )
        self.writeOpen("subroutineBody")
        self.compileItem()  # {
        while (self.tokenizer.keyWord == "VAR"):
            self.compileVarDec()

        nLocals = self.symbolTable.varCount("VAR")
        self.writer.writeFunction(label, nLocals)
        if (funtype == "METHOD"):
            nArgs += 1
            self.writer.writePush("ARG", 0)
            self.writer.writePop("POINTER", 0)
        elif (funtype == "CONSTRUCTOR"):
            self.writer.writePush("CONST", self.symbolTable.varCount("FIELD"))
            self.writer.writeCall("Memory.alloc", 1)
            self.writer.writePop("POINTER", 0)
        self.compileStatements()
        self.compileItem()  # }
        self.writeClose("subroutineBody")
        self.writeClose("subroutineDec")

    def compileParameterList(self):
        self.writeOpen("parameterList")
        kind = "ARG"
        while (self.tokenizer.symbol != ")"):
            thetype = self.tokenizer.identifier if self.tokenizer.identifier else self.tokenizer.keyWord
            self.compileItem()  # type
            self.compileNewIdentifier(thetype, kind)  # name
            if (self.tokenizer.symbol == ","):
                self.compileItem()  # ,

        self.writeClose("parameterList")

    def compileVarDec(self):
        self.writeOpen("varDec")
        count = self.compileVarDecList()
        self.writeClose("varDec")
        return count

    def compileStatements(self):
        self.writeOpen("statements")
        while (self.tokenizer.symbol != "}"):
            if (self.tokenizer.keyWord == "DO"):
                self.compileDo()
            elif (self.tokenizer.keyWord == "IF"):
                self.compileIf()
            elif (self.tokenizer.keyWord == "LET"):
                self.compileLet()
            elif (self.tokenizer.keyWord == "RETURN"):
                self.compileReturn()
            elif (self.tokenizer.keyWord == "WHILE"):
                self.compileWhile()
            else:
                raise Exception()
        self.writeClose("statements")

    def compileDo(self):
        self.writeOpen("doStatement")
        self.compileItem()  # do
        label = ""
        argCount = 0
        while (self.tokenizer.symbol != "("):
            label += self.compileItem()[0]  # Main . method
        parts = label.split(".")
        objsym = self.symbolTable.getSymbol(parts[0])
        if (objsym is not None):
            # is a method, push obj as first param
            self.writer.writePush(objsym.kind, objsym.index)
            label = label.replace(objsym.name, objsym.thetype)
            argCount += 1
        elif (len(parts) == 1):
            label = self.classname + "." + label
            self.writer.writePush("POINTER", 0)
            argCount += 1

        self.compileItem()  # (
        argCount += self.compileExpressionList()
        self.compileItem()  # )
        self.compileItem()  # ;
        self.writer.writeCall(label, argCount)
        self.writer.writePop("TEMP", 0)
        self.writeClose("doStatement")

    def compileLet(self):
        self.writeOpen("letStatement")
        self.compileItem()  # let
        is_array = False
        if (self.tokenizer.symbol != "="):
            value, sym = self.compileItem()  # a
            if (self.tokenizer.symbol == "["):
                is_array = True
                self.compileItem()  # [
                self.compileExpression()  # 0
                self.writer.writePush(sym.kind, sym.index)
                self.writer.writeArithmetic("ADD")
                self.compileItem()  # ]
        self.compileItem()  # =
        self.compileExpression()
        self.compileItem()  # ;
        if (is_array):
            self.writer.writePop("TEMP", 0)
            self.writer.writePop("POINTER", 1)
            self.writer.writePush("TEMP", 0)
            self.writer.writePop("THAT", 0)
        else:
            self.writer.writePop(sym.kind, sym.index)
        self.writeClose("letStatement")

    def compileWhile(self):
        self.writeOpen("whileStatement")
        start = self.uniqueLabel("LOOPSTART")
        end = self.uniqueLabel("LOOPEND")
        self.writer.writeLabel(start)
        self.compileItem()  # while
        self.compileItem()  # (
        self.compileExpression()
        self.writer.writeArithmetic("NOT")
        self.writer.writeIf(end)
        self.compileItem()  # )
        self.compileItem()  # {
        self.compileStatements()
        self.compileItem()  # }
        self.writer.writeGoto(start)
        self.writer.writeLabel(end)
        self.writeClose("whileStatement")

    def compileReturn(self):
        self.writeOpen("returnStatement")
        self.compileItem()  # return
        if (self.tokenizer.symbol != ";"):
            self.compileExpression()
        self.compileItem()  # ;
        self.writer.writeReturn()
        self.writeClose("returnStatement")

    def compileIf(self):
        self.writeOpen("ifStatement")
        self.compileItem()  # if
        self.compileItem()  # (
        self.compileExpression()
        iftrue = self.uniqueLabel("IFTRUE")
        iffalse = self.uniqueLabel("IFFALSE")
        ifend = self.uniqueLabel("IFEND")
        self.writer.writeIf(iftrue)
        self.writer.writeGoto(iffalse)
        self.writer.writeLabel(iftrue)
        self.compileItem()  # )
        self.compileItem()  # {
        self.compileStatements()
        self.compileItem()  # }
        self.writer.writeGoto(ifend)
        self.writer.writeLabel(iffalse)
        if (self.tokenizer.keyWord == "ELSE"):
            self.compileItem()  # else
            self.compileItem()  # {
            self.compileStatements()
            self.compileItem()  # }
        self.writer.writeLabel(ifend)
        self.writeClose("ifStatement")

    def compileExpression(self):
        self.writeOpen("expression")
        self.compileTerm()
        while (self.tokenizer.symbol in OPS.keys()):
            op, _ = self.compileItem()  # & | + etc
            self.compileTerm()
            if (op in ["*", "/"]):
                self.writer.writeCall(OPS[op], 2)
            else:
                self.writer.writeArithmetic(OPS[op])
        self.writeClose("expression")

    def compileNewIdentifier(self, thetype, kind):
        sym = self.symbolTable.define(self.tokenizer.identifier, thetype, kind)
        self.writeTerminal(
            "identifier", "{} DEFINE {} {} {}".format(sym.kind, sym.thetype,
                                                      sym.name, sym.index))
        self.advance()

    def compileItem(self):
        sym = None
        ret = None
        if (self.tokenizer.tokenType == "KEYWORD"):
            ret = self.tokenizer.keyWord
            self.writeTerminal("keyword", self.tokenizer.keyWord.lower())
        elif (self.tokenizer.tokenType == "IDENTIFIER"):
            name = self.tokenizer.identifier
            sym = self.symbolTable.getSymbol(name)
            if (sym):
                ret = name
                self.writeTerminal(
                    "identifier",
                    "{} EXISTING {} {} {}".format(sym.kind, sym.thetype,
                                                  sym.name, sym.index))
            else:
                ret = name
                # class or subroutine
                self.writeTerminal("identifier", "CLASS/SUBROUTINE " + name)
        elif (self.tokenizer.tokenType == "SYMBOL"):
            ret = self.tokenizer.symbol
            self.writeTerminal(
                "symbol",
                self.tokenizer.symbol.replace("&", "&amp;").replace(
                    "<", "&lt;").replace(">", "&gt;"))
        elif (self.tokenizer.tokenType == "INT_CONST"):
            ret = self.tokenizer.intVal
            self.writeTerminal("integerConstant", self.tokenizer.intVal)
        elif (self.tokenizer.tokenType == "STRING_CONST"):
            ret = self.tokenizer.stringVal
            self.writeTerminal("stringConstant", self.tokenizer.stringVal)
        self.advance()
        return ret, sym

    def compileTerm(self):
        self.writeOpen("term")
        if (self.tokenizer.symbol == "("):
            self.compileItem()  # (
            self.compileExpression()
            self.compileItem()  # )
        elif (self.tokenizer.symbol in UNARY_OPS.keys()):
            unary_op, _ = self.compileItem()  # - ~
            self.compileTerm()
            self.writer.writeArithmetic(UNARY_OPS[unary_op])
        else:
            tokenType = self.tokenizer.tokenType
            value, sym = self.compileItem()  # any value
            if (self.tokenizer.symbol == "."):
                value += self.compileItem()[0]  # .
                value += self.compileItem()[0]  # subroutineName
            if (self.tokenizer.symbol == "["):
                self.writer.writePush(sym.kind, sym.index)
                self.compileItem()  # [
                self.compileExpression()
                self.writer.writeArithmetic("ADD")
                self.writer.writePop("POINTER", 1)
                self.writer.writePush("THAT", 0)
                self.compileItem()  # ]
            elif (self.tokenizer.symbol == "("):
                parts = value.split(".")
                sym = self.symbolTable.getSymbol(parts[0])
                nArgs = 0
                if (sym is not None):
                    # is a method, push obj as first param
                    self.writer.writePush(sym.kind, sym.index)
                    value = value.replace(sym.name, sym.thetype)
                    nArgs += 1
                elif (len(parts) == 1):
                    value = self.classname + "." + value
                    self.writer.writePush("POINTER", 0)
                    nArgs += 1

                self.compileItem()  # (
                nArgs += self.compileExpressionList()
                self.writer.writeCall(value, nArgs)
                self.compileItem()  # )
            elif (tokenType == "INT_CONST"):
                self.writer.writePush("CONST", value)
            elif (tokenType == "STRING_CONST"):
                self.writer.writePush("CONST", len(value))
                self.writer.writeCall("String.new", 1)
                for i in range(len(value)):
                    self.writer.writePop("TEMP", 1)
                    self.writer.writePush("TEMP", 1)
                    self.writer.writePush("TEMP", 1)
                    self.writer.writePush("CONST", ord(value[i]))
                    self.writer.writeCall("String.appendChar", 2)
                    self.writer.writePop("TEMP", 0)

            elif (tokenType == "KEYWORD"):
                if (value == "TRUE"):
                    self.writer.writePush("CONST", 1)
                    self.writer.writeArithmetic("NEG")
                elif (value == "THIS"):
                    self.writer.writePush("POINTER", 0)
                elif (value in ["FALSE", "NULL"]):
                    self.writer.writePush("CONST", 0)
                else:
                    print(value)
                    raise Exception()
            else:
                self.writer.writePush(sym.kind, sym.index)

        self.writeClose("term")

    def compileExpressionList(self):
        self.writeOpen("expressionList")
        count = 0
        while (self.tokenizer.symbol != ")"):
            count += 1
            self.compileExpression()
            if (self.tokenizer.symbol == ","):
                self.compileItem()  # ,
        self.writeClose("expressionList")
        return count
Ejemplo n.º 8
0
class CompilationEngine():
    '''
    Parses a stream of jack tokens recursively.
    '''

    def __init__(self, tokenizer):
        self._name = tokenizer.get_filename().replace('.jack','')
        # tokenizer for input
        self._tokenizer = tokenizer
        # symbol table
        self._symbols = SymbolTable()
        # vm output fiole
        self._writer = VMWriter(self._name + '.vm')
        # Input should be a tokenized .jack file containing one class
        assert self._tokenizer.has_more_tokens()
        self._tokenizer.advance()
        self._class = None
        self._subroutine = None
        self._counter = 0
        self.compile_class()
        self.close()


    def change_name(self, name):
        self._name = name

    def get_name(self, name):
        return self._name

    def get_token(self):
        return self._tokenizer._token

    def get_type(self):
        return self._tokenizer._type

    def close(self):
        # close the output file at the end
        self._writer.close()

    def compile_class(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        # keyword - class
        assert self._tokenizer.keyword() == 'class'
        self._tokenizer.advance()
        # identifier - className
        assert self._tokenizer.identifier()
        self._class = self._tokenizer.identifier()
        self._tokenizer.advance()
        # sybmol - '{'
        assert self._tokenizer.symbol() == '{', "expected '{' but got " + self.get_token()
        self._tokenizer.advance()
        # classVarDec*
        while self._tokenizer.is_valid_class_variable():
            self.compile_class_var()
        # subroutineBody*
        while self._tokenizer.is_valid_subroutine():
            self.compile_subroutine()
        # sybmol - '}'
        assert self._tokenizer.symbol() == '}', "expected '}' but got " + self.get_token()
        self._tokenizer.advance()
        # assuming .jack file is properly formatted, there should be no more tokens
        assert not self._tokenizer.has_more_tokens()


    def compile_class_var(self):
        # ('static'|'field') type varName (',' varName)* ';'
        assert self._tokenizer.is_valid_class_variable()
        # keyword - 'static' or 'field'
        temp_kind = self._tokenizer.get_token()
        self._tokenizer.advance()
        # type - 'int' or 'char' or 'boolean' or className
        assert self._tokenizer.is_valid_type()
        temp_type = self._tokenizer.get_token()
        self._tokenizer.advance()
        # identifier - varName
        assert self._tokenizer.identifier()
        temp_name = self._tokenizer.get_token()
        self._symbols.define(temp_name, temp_type, temp_kind)
        self._tokenizer.advance()
        # recursively check for (',' varName)*  structure
        while self._tokenizer.symbol() == ',':
            self._tokenizer.advance()
            # identifier - varName
            assert self._tokenizer.identifier()
            temp_name = self._tokenizer.get_token()
            self._symbols.define(temp_name, temp_type, temp_kind)
            # symbol - ',' or ';'
            self._tokenizer.advance()
        # next token should be a ';'
        assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token()
        self._tokenizer.advance()


    def compile_subroutine(self):
        # ('constructor'|'method'|'function') ('void'| type) subroutineName '(' parameterList ')' subroutineBody
        assert self._tokenizer.is_valid_subroutine()
        self._symbols.start_subroutine()
        # keyword - constructor or method or function
        self._subroutine = self._tokenizer.get_token()
        if self._subroutine == 'method':
            # in the case of method, add 'this' to symbol table
            self._symbols.define('this', self._class, 'argument')
        self._tokenizer.advance()
        # keyword - type or void
        assert self._tokenizer.is_valid_subroutine_type()
        self._tokenizer.advance()
        # identifier - subroutineName
        assert self._tokenizer.identifier()
        temp_name = self._tokenizer.identifier()
        self._tokenizer.advance()
        # symbol - '('
        assert self._tokenizer.symbol() == '('
        self._tokenizer.advance()
        # parameterList
        if self._tokenizer.is_valid_type():
            self.compile_parameter_list()
        # symbol - '('
        assert self._tokenizer.symbol() == ')'
        self._tokenizer.advance()
        temp_name = self._class + '.' + temp_name
        # symbol - '{'
        assert self._tokenizer.symbol() == '{'
        # subroutineBody
        self.compile_subroutine_body(temp_name)
        self._writer.write_comment('end subroutine ' + temp_name)


    def compile_parameter_list(self):
        # ( (type varName) (',' type varName)* )?
        # only called if non-empty parameter list
        assert self._tokenizer.is_valid_type()
        # type - int or char or boolean or className
        temp_type = self._tokenizer.get_token()
        self._tokenizer.advance()
        # identifier - varName
        assert self._tokenizer.identifier()
        temp_name = self._tokenizer.get_token()
        self._symbols.define(temp_name, temp_type, 'argument')
        self._tokenizer.advance()
        while self._tokenizer.symbol() == ',':
            # symbol - ','
            self._tokenizer.advance()
            assert self._tokenizer.is_valid_type()
            # type - int or char or boolean or className
            temp_type = self._tokenizer.get_token()
            self._tokenizer.advance()
            # identifier - varName
            assert self._tokenizer.identifier()
            temp_name = self._tokenizer.get_token()
            self._symbols.define(temp_name, temp_type, 'argument')
            self._tokenizer.advance()
        # symbol - ')'
        assert self._tokenizer.symbol() == ')'


    def compile_subroutine_body(self, name):
        # '{' varDec* statements '}'
        # symbol - '{'
        assert self._tokenizer.symbol() == '{'
        self._tokenizer.advance()
        # varDec
        num_locals = 0
        while self._tokenizer.keyword() == 'var':
            # remember that compiling variables writes NO vm code
            num_locals += self.compile_var()
        self._writer.write_function(name, num_locals)
        if self._subroutine == 'method':
            # set this, in the case of a method
            self._writer.write_push('argument',0)
            self._writer.write_pop('pointer',0)
        elif self._subroutine == 'constructor':
            # allocate object
            self._writer.write_object_alloc(self._symbols.var_count('field'))
        # statements
        self.compile_statements()
        # symbol - '{'
        assert self._tokenizer.symbol() == '}'
        self._tokenizer.advance()


    def compile_var(self):
        # 'var' type varName (',' varName)* ';'
        assert self._tokenizer.is_valid_variable()
        # keyword - 'var'
        self._tokenizer.advance()
        # type - int or char or boolean or className
        assert self._tokenizer.is_valid_type()
        temp_type = self._tokenizer.get_token()
        self._tokenizer.advance()
        # identifier - varName
        assert self._tokenizer.identifier()
        temp_name = self._tokenizer.get_token()
        self._symbols.define(temp_name, temp_type, 'local')
        num_locals = 1
        self._tokenizer.advance()
        while self._tokenizer.symbol() == ',':
            # symbol - ','
            self._tokenizer.advance()
            # identifier - varName
            assert self._tokenizer.identifier()
            temp_name = self._tokenizer.get_token()
            self._symbols.define(temp_name, temp_type, 'local')
            num_locals += 1
            self._tokenizer.advance()
        # symbol - ';'
        assert self._tokenizer.symbol() == ';'
        self._tokenizer.advance()
        return num_locals


    def compile_statements(self):
        # statement*
        while self._tokenizer.is_valid_statement():
            if self._tokenizer.keyword() == 'let':
                # letStatement
                self.compile_let()
            elif self._tokenizer.keyword() == 'if':
                # ifStatement
                self.compile_if()
            elif self._tokenizer.keyword() == 'while':
                # whileStatement
                self.compile_while()
            elif self._tokenizer.keyword() == 'do':
                # doStatement
                self.compile_do()
            elif self._tokenizer.keyword() == 'return':
                # returnStatement
                self.compile_return()
        # symbol - '}'
        assert self._tokenizer.symbol() == '}'

    def compile_let(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        # keyword - 'let'
        assert self._tokenizer.keyword() == 'let'
        self._tokenizer.advance()
        # identifier - varName
        assert self._tokenizer.identifier()
        if self._tokenizer.peek() == '=':
            # varName '=' expression ';'
            var_kind = self._symbols.kind_of(self._tokenizer.identifier())
            var_index = self._symbols.index_of(self._tokenizer.identifier())
            self._tokenizer.advance()
            # next token is '='
            self._tokenizer.advance()
            # evaluate RHS expression, pop into variable
            self.compile_expression()
            if var_kind == 'field':
                self._writer.write_pop('this', var_index)
            else:
                self._writer.write_pop(var_kind, var_index)
            # expression ends with a ';'
            assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token()
            self._tokenizer.advance()
        elif self._tokenizer.peek() == '[':
            # varName '[' expression ']' '=' expression ';'
            # write base address to stack
            self._writer.write_push(self._symbols.kind_of(self._tokenizer.identifier()),
                                    self._symbols.index_of(self._tokenizer.identifier()))
            self._tokenizer.advance()
            # symbol - '['
            self._tokenizer.advance()
            # expression - represents array index
            self.compile_expression()
            # base address + array index
            self._writer.write_arithmetic('add')
            # symbol - '['
            assert self._tokenizer.symbol() == ']'
            self._tokenizer.advance()
            # symbol - '='
            assert self._tokenizer.symbol() == '='
            self._tokenizer.advance()
            # expression
            self.compile_expression()
            # pop RHS value into temp segment
            self._writer.write_pop('temp', 1)
            # align that with array[i]
            self._writer.write_pop('pointer', 1)
            # push value of RHS expression onto stack
            self._writer.write_push('temp', 1)
            # pop value into correct array index
            self._writer.write_pop('that', 0)
            # symbol - ';'
            assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token()
            self._tokenizer.advance()


    def compile_if(self):
        # 'if' '(' expression ')' ('else' '{' statements '}')?
        # keyword - if
        assert self._tokenizer.keyword() == 'if'
        self._writer.write_comment('if statement')
        self._tokenizer.advance()
        # symbol - (
        assert self._tokenizer.symbol() == '(', "expected '(' but got " + self.get_token()
        self._tokenizer.advance()
        # expression
        self.compile_expression()
        self._writer.write_arithmetic('not')
        label_num = str(self._counter)
        self._counter += 1
        self._writer.write_if('ELSE'+label_num)
        # symbol - )
        assert self._tokenizer.symbol() == ')', "expected '(' but got " + self.get_token()
        self._tokenizer.advance()
        # symbol - '{'
        assert self._tokenizer.symbol() == '{'
        self._tokenizer.advance()
        # statements
        self.compile_statements()
        # symbol - '}'
        assert self._tokenizer.symbol() == '}'
        self._tokenizer.advance()
        self._writer.write_goto('IF'+label_num)
        self._writer.write_label('ELSE'+label_num)
        # check for else
        if self._tokenizer.keyword() == 'else':
            # 'else' '{' statements '}'
            # keyword - 'else'
            self._tokenizer.advance()
            # symbol - '{'
            assert self._tokenizer.symbol() == '{', "expected '{' but got " + self.get_token()
            self._tokenizer.advance()
            # statements
            self.compile_statements()
            # symbol - '}'
            assert self._tokenizer.symbol() == '}', "expected '}' but got " + self.get_token()
            self._tokenizer.advance()
        self._writer.write_label('IF'+label_num)


    def compile_while(self):
        # 'while' '(' expression ')' '{' statements '}'
        # keyword - 'while'
        assert self._tokenizer.keyword() == 'while'
        # labels for ifgoto and goto vm commands
        label_num = str(self._counter)
        self._counter += 1
        self._tokenizer.advance()
        # symbol - '('
        assert self._tokenizer.symbol() == '('
        self._tokenizer.advance()
        self._writer.write_label('WHILE'+label_num)
        # expression
        self.compile_expression()
        self._writer.write_arithmetic('not')
        self._writer.write_if('ELSE'+label_num)
        # symbol - ')'
        assert self._tokenizer.symbol() == ')'
        self._tokenizer.advance()
        # symbol - '{'
        assert self._tokenizer.symbol() == '{'
        self._tokenizer.advance()
        # statements
        self.compile_statements()
        self._writer.write_goto('WHILE'+label_num)
        self._writer.write_label('ELSE'+label_num)
        # symbol - '}'
        assert self._tokenizer.symbol() == '}'
        self._tokenizer.advance()

    def compile_do(self):
        # 'do' subroutineCall ';'
        assert self._tokenizer.keyword() == 'do'
        # keyword - 'do'
        self._tokenizer.advance()
        # identifier - subroutineCall
        assert self._tokenizer.identifier()
        # outer subroutine must be void function
        self.compile_subroutine_call()
        # symbol - ';'
        assert self._tokenizer.symbol() == ';'
        # discard void function default return value
        self._writer.write_pop('temp',0)
        self._tokenizer.advance()



    def compile_return(self):
        # 'return' expression? ';'
        # keyword - 'return'
        assert self._tokenizer.keyword() == 'return'
        self._writer.write_comment('return statement')
        self._tokenizer.advance()
        # expression?
        if self._tokenizer.symbol() == ';':
            # symbol - ';' (void function)
            self._writer.write_push('constant', 0)
            self._tokenizer.advance()
        else:
            # expression (not void)
            self.compile_expression()
            # symbol - ';'
            assert self._tokenizer.symbol() == ';'
            self._tokenizer.advance()
        self._writer.write_return()


    def compile_expression(self):
        # term (op term)*
        # term
        self.compile_term()
        # check for op
        while self._tokenizer.is_valid_operator():
            # op
            temp_op = self._tokenizer.symbol()
            self._tokenizer.advance()
            # term
            self.compile_term()
            # write operator vm command, postfix order
            self._writer.write_operator(temp_op)


    def compile_term(self):
        # integerConstant | stringConstant | keywordConstant | varName |
        # varName '[' expression']' | subroutineCall | '(' expression ')' | unaryOp term
        if self._tokenizer.int_value() is not None:
            # integerConstant
            self._writer.write_push('constant', self._tokenizer.int_value())
            self._tokenizer.advance()
        elif self._tokenizer.string_value() is not None:
            # stringConstant
            self._writer.write_string_constant(self._tokenizer.string_value())
            self._tokenizer.advance()
        elif self._tokenizer.keyword() is not None:
            # keywordConstant
            self._writer.write_keyword_constant(self._tokenizer.keyword())
            self._tokenizer.advance()
        elif self._tokenizer.symbol() == '(':
            # '(' expression ')'
            self._tokenizer.advance()
            self.compile_expression()
            assert self._tokenizer.symbol() == ')'
            self._tokenizer.advance()
        elif self._tokenizer.is_valid_unary():
            # unaryOp term
            temp_op = self._tokenizer.symbol()
            self._tokenizer.advance()
            # term
            self.compile_term()
            # write operator vm command, postfix order
            self._writer.write_unary(temp_op)
        elif self._tokenizer.identifier() and self._tokenizer.peek() == '[':
            # varName '[' expression']'
            # process array name, push associated value onto stack
            self._writer.write_push(self._symbols.kind_of(self._tokenizer.identifier()),
                                    self._symbols.index_of(self._tokenizer.identifier()))
            self._tokenizer.advance()
            # process [ symbol
            self._tokenizer.advance()
            # expects expression, value is pushed onto the stack
            self.compile_expression()
            # setup pointer to array element
            self._writer.write_operator('+')
            self._writer.write_pop('pointer', 1)
            # push array value onto stack
            self._writer.write_push('that', 0)
            # expects closing square bracket
            assert self._tokenizer.symbol() == ']'
            self._tokenizer.advance()
        elif self._tokenizer.identifier() and self._tokenizer.peek() in ['(','.']:
            # subroutineCall
            self.compile_subroutine_call()
        elif self._symbols.exists(self._tokenizer.identifier()):
            # varName
            var_name = self._tokenizer.identifier()
            var_kind = self._symbols.kind_of(var_name)
            var_index = self._symbols.index_of(var_name)
            if var_kind == 'field':
                # push field var onto stack
                self._writer.write_push('this', var_index)
            else:
                self._writer.write_push(var_kind, var_index)
            self._tokenizer.advance()
        else:
            assert False, "unknown token: " + self.get_token() + " with type " + self.get_type()

    def compile_subroutine_call(self):
        # subroutineName '(' expressionList ')'| (className | varName) '.' subroutineName '(' expressionList ')'
        assert self._tokenizer.identifier() and self._tokenizer.peek() in ['(','.']
        if self._tokenizer.identifier() and self._tokenizer.peek() == '(':
            # subroutineName '(' expressionList ')'
            # method (in current class)
            temp_name = self._class + '.' + self._tokenizer.identifier()
            self._tokenizer.advance()
            # symbol - '('
            self._tokenizer.advance()
            # push this onto the stack
            self._writer.write_push('pointer',0)
            temp_nargs = 1
            # expressionList
            temp_nargs += self.compile_expression_list()
            self._writer.write_call(temp_name, temp_nargs)
            # symbol - ')'
            assert  self._tokenizer.symbol() == ')'
            self._tokenizer.advance()
        elif self._symbols.exists(self._tokenizer.identifier()) and self._tokenizer.peek() == '.':
            # varName '.' subroutineName '(' expressionList ')'
            # varName (object)
            temp_name = self._tokenizer.identifier()
            # push object address onto stack, this is an implicit argument
            if self._symbols.kind_of(temp_name) == 'field':
                self._writer.write_push('this',
                                        self._symbols.index_of(temp_name))
            else: 
                self._writer.write_push(self._symbols.kind_of(temp_name),
                                        self._symbols.index_of(temp_name))
            # change name to class name
            temp_name = self._symbols.type_of(temp_name)
            temp_nargs = 1
            self._tokenizer.advance()
            # symbol - '.'
            temp_name += self._tokenizer.get_token()
            self._tokenizer.advance()
            # subroutineName
            assert self._tokenizer.identifier()
            temp_name += self._tokenizer.identifier()
            self._tokenizer.advance()
            # symbol - '('
            assert self._tokenizer.symbol() == '('
            self._tokenizer.advance()
            # expressionList
            temp_nargs += self.compile_expression_list()
            self._writer.write_call(temp_name, temp_nargs)
            # symbol - '('
            assert  self._tokenizer.symbol() == ')'
            self._tokenizer.advance()
        elif self._tokenizer.identifier() and self._tokenizer.peek() == '.':
            # className . subroutineName '(' expressionList ')'
            # className
            temp_name = self._tokenizer.identifier()
            self._tokenizer.advance()
            # symbol - '.'
            temp_name += self._tokenizer.get_token()
            self._tokenizer.advance()
            # subroutineName
            assert self._tokenizer.identifier(), print(self._tokenizer._tokens)
            temp_name += self._tokenizer.identifier()
            self._tokenizer.advance()
            # symbol - '('
            assert self._tokenizer.symbol() == '('
            self._tokenizer.advance()
            # expressionList
            temp_nargs = self.compile_expression_list()
            self._writer.write_call(temp_name, temp_nargs)
            # symbol - ')'
            assert  self._tokenizer.symbol() == ')'
            self._tokenizer.advance()

    def compile_expression_list(self):
        # (expression ( ',' expression)* )?
        temp_nargs = 0
        while self._tokenizer.symbol() != ')':
            self.compile_expression()
            temp_nargs += 1
            if self._tokenizer.symbol() == ',':
                # there is another expression in the list
                self._tokenizer.advance()
        return temp_nargs