Ejemplo n.º 1
0
class CompilationEngine(object):
   # the destination file for writing
   destination_file = None

   # the tokenizer for the input file
   tokenizer = None

   # symbol table
   symbol_table = None

   # vm writer
   vm_writer = None

   # the class name
   class_name = ""

   # indicies for if and while loops
   # start at -1 because we increment before use
   while_index = -1
   if_index = -1

   # the constructor for compiling a single class
   # the next method to be called after construction must be compile_class
   # source_filename must be a single file, not a directory
   def __init__(self, source_filename):
      # destination filename
      # if the original extension was .jack, then make the extension .vm
      # if the original extension was not .jack, then append .vm
      if source_filename.lower().endswith(".jack"):
         destination_filename = source_filename[:-5] + ".vm"
      else:
         destination_filename = source_filename + ".vm"

      # open the destination filename for writing
      self.destination_file = open(destination_filename, 'w')

      # create a tokenizer for the input file
      self.tokenizer = JackTokenizer(source_filename)

      # create the symbol table
      self.symbol_table = SymbolTable()

      # create the vm writer
      self.vm_writer = VMWriter(self.destination_file)

   # compiles a complete class and closes the output file
   def compile_class(self):
      # class keyword
      tt, t = self._token_next(True, "KEYWORD", "class")

      # name of class
      tt, t = self._token_next(True, "IDENTIFIER")
      self.class_name = t

      # open brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # one or more variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["field", "static"]:
            self.compile_class_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # one or more subroutine declarations
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["constructor", "function", "method"]:
            self.compile_subroutine()
         else:
            # stop trying to process functions
            break

      # close brace
      # do not advance because we already advanced upon exiting the last loop
      tt, t = self._token_next(False, "SYMBOL", "}")

      # done with compilation; close the output file
      self.destination_file.close()

   # compiles a static declaration or field declaration
   def compile_class_var_dec(self):
      # compile the variable declaration
      # False means this is a class (not a subroutine)
      self.compile_var_dec(False)

   # compiles a complete method, function, or constructor
   def compile_subroutine(self):
      # start of subroutine
      self.symbol_table.start_subroutine()

      # constructor, function, or method keyword
      tt, type = self._token_next(False, "KEYWORD")

      # type of the return value
      # can be either keyword (void) or an identifier (any type)
      tt, t = self._token_next(True)

      # name of the method/function/constructor
      tt, name = self._token_next(True)
      name = self.class_name + "." + name

      # if the type is a method, "define" this as an argument, so the other
      # argument indexes work correctly
      if type == "method":
         self.symbol_table.define("this", self.class_name, SymbolTable.ARG)

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # arguments
      self.tokenizer.advance()
      self.compile_parameter_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # variable declarations
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t == "var":
            self.compile_var_dec()
         else:
            # stop trying to process variable declarations
            break

      # write the function
      num_locals = self.symbol_table.var_count(self.symbol_table.VAR)
      self.vm_writer.write_function(name, num_locals)

      # write any special code at the top of the function
      if type == "constructor":
         # code to allocate memory and set "this"
         size = self.symbol_table.var_count(self.symbol_table.FIELD)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("Memory.alloc", 1)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      elif type == "function":
         # nothing special
         pass
      elif type == "method":
         # put argument 0 into pointer 0 (this)
         self.vm_writer.write_push(self.vm_writer.ARG, 0)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 0)
      else:
         print "WARNING: Expected constructor, function, or name; got", type

      # statements
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      self.tokenizer.advance()

   # compiles a (possibly empty) parameter list, not including the enclosing
   # parentheses
   def compile_parameter_list(self):
      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # keyword (variable type)
            tt, type = self._token_next(False)

            # identifier (variable name)
            tt, name = self._token_next(True)

            # the kind is always an arg, since these are all parameters to the
            # function
            kind = SymbolTable.ARG

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            # possible comma
            tt, t = self._token_next(True)
            if tt != "SYMBOL" or t != ",":
               # not a comma; stop processing parameters
               break

            self.tokenizer.advance()

   # compiles a var declaration
   # if subroutine is true, only the var keyword can be used
   # if subroutine is false, only the static and field keywords can be used
   def compile_var_dec(self, subroutine=True):
      # the keyword to start the declaration
      tt, kind = self._token_next(False, "KEYWORD")

      # check for required types
      if subroutine:
         if kind == "var":
            kind = SymbolTable.VAR
         else:
            print "WARNING: expecting var, but received %s" % (str(kind))
      else:
         if kind == "static":
            kind = SymbolTable.STATIC
         elif kind == "field":
            kind = SymbolTable.FIELD
         else:
            print "WARNING: expecting static or field, but received %s" % (str(kind))

      # type of the declaration
      # could be an identifier or a keyword (int, etc)
      tt, type = self._token_next(True)

      # name of the declaration
      tt, name = self._token_next(True, "IDENTIFIER")

      # define the variable in the symbol table
      self.symbol_table.define(name, type, kind)

      # can support more than one identifier name, to declare more than one
      # variable, separated by commas; process the 2nd-infinite variables
      self.tokenizer.advance()
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t == ",":
            # another variable name follows
            tt, name = self._token_next(True, "IDENTIFIER")

            # define the variable in the symbol table
            self.symbol_table.define(name, type, kind)

            self.tokenizer.advance()
         else:
            # no more variable names
            break

      # should be on the semicolon at the end of the line
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a sequence of statements, not including the enclosing {}
   def compile_statements(self):
      while True:
         tt, t = self._token_next(False)
         if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]:
            # call compile_t, where t is the type of compilation we want
            token = getattr(self, "compile_" + t)()
         else:
            # not a statement; stop processing statements
            break

   # compiles a do statement
   def compile_do(self):
      # do keyword
      tt, t = self._token_next(False, "KEYWORD", "do")

      # subroutine call
      self.tokenizer.advance()
      self.compile_subroutine_call()

      # do statements do not have a return value, so eliminate the return
      # off of the stack
      self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a let statement
   def compile_let(self):
      # let keyword
      tt, t = self._token_next(False, "KEYWORD", "let")

      # variable name
      tt, name = self._token_next(True, "IDENTIFIER")

      # possible brackets for array
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == "[":
         # array - write operation
         array = True

         # compile the offset expression
         self.tokenizer.advance()
         self.compile_expression()

         # write the base address onto the stack
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_push(segment, index)

         # add base and offset
         self.vm_writer.write_arithmetic("add")

         # we cannot yet put the result into pointer 1, since the read
         # operation (which hasn't been parsed/computed yet) may use pointer 1
         # to read from an arrya value

         # closing bracket
         tt, t = self._token_next(False, "SYMBOL", "]")

         # advance to the next token, since we are expected to be on the = for
         # the next line
         self.tokenizer.advance()
      else:
         array = False

      # equals sign
      tt, t = self._token_next(False, "SYMBOL", "=")

      # expression
      self.tokenizer.advance()
      self.compile_expression()

      if array:
         # our stack now looks like this:
         #    TOP OF STACK
         #    computed result to store
         #    address in which value should be stored
         #    ... previous stuff ...

         # pop the computed value to temp 0
         self.vm_writer.write_pop(self.vm_writer.TEMP, 0)

         # pop the array address to pointer 1 (that)
         self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

         # put the computed value back onto the stack
         self.vm_writer.write_push(self.vm_writer.TEMP, 0)

         # pop to the variable name or the array reference
         self.vm_writer.write_pop(self.vm_writer.THAT, 0)
      else:
         # not an array - pop the expression to the variable
         segment, index = self._resolve_symbol(name)
         self.vm_writer.write_pop(segment, index)

      # semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.tokenizer.advance()

   # compiles a while statement
   def compile_while(self):
      # labels for this while loop
      self.while_index += 1
      while_start = "WHILE_START_%d" % (self.while_index)
      while_end = "WHILE_END_%d" % (self.while_index)

      # while keyword
      tt, t = self._token_next(False, "KEYWORD", "while")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # label for the start of the while statement
      self.vm_writer.write_label(while_start)

      # the expression that is the condition of the while statement
      self.tokenizer.advance()
      self.compile_expression()

      # the closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto to the end of the loop
      # to do this, negate and then call if-goto
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(while_end)

      # the opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # the statments that is the body of the while loop
      self.tokenizer.advance()
      self.compile_statements()

      # the closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      # after the last statement of the while loop
      # need to jump back up to the top of the loop to evaluate again
      self.vm_writer.write_goto(while_start)

      # label at the end of the loop
      self.vm_writer.write_label(while_end)

      self.tokenizer.advance()

   # compiles a return statement
   def compile_return(self):
      # return keyword
      tt, t = self._token_next(False, "KEYWORD", "return")

      # possible expression to return
      tt, t = self._token_next(True)
      if tt != "SYMBOL" and t != ";":
         self.compile_expression()
      else:
         # no return expression; return 0
         self.vm_writer.write_push(self.vm_writer.CONST, 0)

      # ending semicolon
      tt, t = self._token_next(False, "SYMBOL", ";")

      self.vm_writer.write_return()

      self.tokenizer.advance()

   # compiles a if statement, including a possible trailing else clause
   def compile_if(self):
      # it is more efficient in an if-else case to have the else portion first
      # in the code when testing, but we use the less-efficient but
      # easier-to-write true-false pattern here

      # labels for this if statement
      self.if_index += 1
      if_false = "IF_FALSE_%d" % (self.if_index)
      if_end = "IF_END_%d" % (self.if_index)

      # if keyword
      tt, t = self._token_next(False, "KEYWORD", "if")

      # opening parenthesis
      tt, t = self._token_next(True, "SYMBOL", "(")

      # expression of if statement
      self.tokenizer.advance()
      self.compile_expression()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # the result of the evaluation is now on the stack
      # if false, then goto the false label
      # if true, fall through to executing code
      # if there is no else, then false and end are the same, but having two
      # labels does not increase code size
      self.vm_writer.write_arithmetic("not")
      self.vm_writer.write_if(if_false)

      # opening brace
      tt, t = self._token_next(True, "SYMBOL", "{")

      # statements for true portion
      self.tokenizer.advance()
      self.compile_statements()

      # closing brace
      tt, t = self._token_next(False, "SYMBOL", "}")

      tt, t = self._token_next(True)
      if tt == "KEYWORD" and t == "else":
         # else statement exists

         # goto the end of the if statement at the end of the true portion
         self.vm_writer.write_goto(if_end)

         # label for the start of the false portion
         self.vm_writer.write_label(if_false)

         # opening brace
         tt, t = self._token_next(True, "SYMBOL", "{")

         # statements
         self.tokenizer.advance()
         self.compile_statements()

         # closing brace
         tt, t = self._token_next(False, "SYMBOL", "}")

         # end label
         self.vm_writer.write_label(if_end)

         # advance tokenizer only if we are in the else, since otherwise the
         # token was advanced by the else check
         self.tokenizer.advance()
      else:
         # no else portion; only put in a label for false, since end is not
         # used
         self.vm_writer.write_label(if_false)

   # compiles an expression (one or more terms connected by operators)
   def compile_expression(self):
      # the first term
      self.compile_term()

      # finish any number of operators followed by terms
      while True:
         tt, t = self._token_next(False)
         if tt == "SYMBOL" and t in "+-*/&|<>=":
            # found an operator
            # postfix order - add the next term and then do the operator

            # the next term
            self.tokenizer.advance()
            self.compile_term()

            # the operator
            if t == "+":
               self.vm_writer.write_arithmetic("add")
            if t == "-":
               self.vm_writer.write_arithmetic("sub")
            if t == "=":
               self.vm_writer.write_arithmetic("eq")
            if t == ">":
               self.vm_writer.write_arithmetic("gt")
            if t == "<":
               self.vm_writer.write_arithmetic("lt")
            if t == "&":
               self.vm_writer.write_arithmetic("and")
            if t == "|":
               self.vm_writer.write_arithmetic("or")
            if t == "*":
               self.vm_writer.write_call("Math.multiply", 2)
            if t == "/":
               self.vm_writer.write_call("Math.divide", 2)
         else:
            # no term found; done parsing the expression
            break

   # compiles a term
   # this routine is faced with a slight difficulty when trying to decide
   # between some of the alternative parsing rules. specifically, if the
   # current token is an identifier, the routine must distinguish between a
   # variable, an array entry, and a subroutine call. a single lookahead token,
   # which may be one of [, (, or ., suffices to distinguish between the three
   # possibilities. any other token is not part of this term and should not
   # be advanced over.
   def compile_term(self):
      # a term: integer_constant | string_constant | keyword_constant |
      # varname | varname[expression] | subroutine_call | (expression) |
      # unary_op term
      tt, t = self._token_next(False)
      if tt == "INT_CONST":
         self.vm_writer.write_push(self.vm_writer.CONST, t)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "STRING_CONST":
         # after this portion is run, a pointer to a string should be on the
         # stack
         # we create a new string of a certain size and then append characters
         # one by one; each append operation returns the pointer to the same
         # string

         # create the string
         # string is a len, data tuple; not null-terminated
         size = len(t)
         self.vm_writer.write_push(self.vm_writer.CONST, size)
         self.vm_writer.write_call("String.new", 1)

         # append each character
         for char in t:
            self.vm_writer.write_push(self.vm_writer.CONST, ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "KEYWORD":
         if t == "true":
            # true is -1, which is 0 negated
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
            self.vm_writer.write_arithmetic("not")
         elif t == "false" or t == "null":
            self.vm_writer.write_push(self.vm_writer.CONST, 0)
         elif t == "this":
            self.vm_writer.write_push(self.vm_writer.POINTER, 0)

         # advance for the next statement
         self.tokenizer.advance()
      elif tt == "SYMBOL" and t == "(":
         # ( expression )

         # parse the expression
         self.tokenizer.advance()
         self.compile_expression()

         # closing parenthesis
         tt, t = self._token_next(False, "SYMBOL", ")")

         # advance for the next statement
         self.tokenizer.advance()

      elif tt == "SYMBOL" and t in "-~":
         # unary_op term
         # postfix order - add the next term and then do the operator

         # parse the rest of the term
         self.tokenizer.advance()
         self.compile_term()

         # write the unary operation
         if t == "-":
            self.vm_writer.write_arithmetic("neg")
         elif t == "~":
            self.vm_writer.write_arithmetic("not")

      elif tt == "IDENTIFIER":
         # varname, varname[expression], subroutine_call

         # do not write the identifer yet

         # get the next bit of the expression
         # if it is a [, then array; if it is a ( or ., then subroutine call
         # if none of above, then pass over
         tt2, t2 = self._token_next(True)

         if tt2 == "SYMBOL" and t2 in "(.":
            # subroutine call
            # back up and then compile the subroutine call
            self.tokenizer.retreat()

            self.compile_subroutine_call()
         elif tt2 == "SYMBOL" and t2 == "[":
            # array - read operation

            # write the base address onto the stack
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

            # compile the offset expression
            self.tokenizer.advance()
            self.compile_expression()

            # add base and offset
            self.vm_writer.write_arithmetic("add")

            # put the resulting address into pointer 1 (that)
            self.vm_writer.write_pop(self.vm_writer.POINTER, 1)

            # read from that 0 onto the stack
            self.vm_writer.write_push(self.vm_writer.THAT, 0)

            # closing bracket
            tt, t = self._token_next(False, "SYMBOL", "]")

            # advance for the next statement
            self.tokenizer.advance()
         else:
            # none of above - just a single identifier
            segment, index = self._resolve_symbol(t)
            self.vm_writer.write_push(segment, index)

      else:
         # unknown
         print "WARNING: Unknown term expression object:", tt, t

   # compiles a (possible empty) comma-separated list of expressions
   def compile_expression_list(self):
      num_args = 0

      # check for empty list
      tt, t = self._token_next(False)
      if tt == "SYMBOL" and t == ")":
         # the parameter list was empty; do not process any more
         pass
      else:
         # there are things in the parameter list
         while True:
            # expression to pass
            self.compile_expression()
            num_args += 1

            # possible comma
            tt, t = self._token_next(False)
            if tt == "SYMBOL" and t == ",":
               self.tokenizer.advance()
            else:
               # not a comma; stop processing parameters
               break

      return num_args

   # compiles a subroutine call
   # two cases:
   # - subroutineName(expressionList)
   # - (class|var).subroutineName(expressionList)
   def compile_subroutine_call(self):
      # first part of name
      tt, name1 = self._token_next(False, "IDENTIFIER")

      # a dot and another name may exist, or it could be a parenthesis
      name2 = None
      tt, t = self._token_next(True)
      if tt == "SYMBOL" and t == ".":
         # the name after the dot
         tt, name2 = self._token_next(True, "IDENTIFIER")

         # advance so that we are on the parenthesis
         self.tokenizer.advance()

      # determine if this is a method call
      # three possibilities
      # - class.func() - function call
      # - var.func()   - method call
      # - func()       - method call on current object
      if self.symbol_table.contains(name1):
         method_call = True
         local_call = False
      elif name2 == None:
         method_call = True
         local_call = True
      else:
         method_call = False

      # if a method call, push variable name1
      # this a method call if the symbol table contains name1 and name2 exists
      # OR name1 is a method in the current object
      if method_call and local_call:
         # push the current object onto the stack as a hidden argument
         self.vm_writer.write_push(self.vm_writer.POINTER, 0)
      elif method_call and not local_call:
         # push the variable onto the stack as a hidden argument
         segment, index = self._resolve_symbol(name1)
         self.vm_writer.write_push(segment, index)

      # opening parenthesis
      tt, t = self._token_next(False, "SYMBOL", "(")

      # expression list
      self.tokenizer.advance()
      num_args = self.compile_expression_list()

      # closing parenthesis
      tt, t = self._token_next(False, "SYMBOL", ")")

      # write the call
      if method_call and local_call:
         # methd + <blank>

         # get the name of the vm function to call
         classname = self.class_name
         vm_function_name = classname + "." + name1

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      elif method_call and not local_call:
         # variable name + method

         # get the name of the vm function to call
         classname = self.symbol_table.get(name1)[1]
         vm_function_name = classname + "." + name2

         # increase arguments by 1, since there is the hidden "this"
         num_args += 1

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)
      else:
         # get the name of the vm function to call
         vm_function_name = name1 + "." + name2

         # make the call
         self.vm_writer.write_call(vm_function_name, num_args)

      self.tokenizer.advance()

   # returns the token_type and token of the next token after advancing the
   # tokenizer before reading if advance is True
   def _token_next(self, advance=False, expected_type=None, expected_value=None):
      # advance the tokenizer, if requested
      if advance:
         self.tokenizer.advance()

      # get the token type and the token itself
      token_type = self.tokenizer.token_type()
      token = str(getattr(self.tokenizer, token_type.lower())())

      if expected_type and token_type != expected_type:
         print "WARNING: Type", token_type, "found; expected", expected_type
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)
      if expected_value and token != expected_value:
         print "WARNING: Value", token, "found; expected", expected_value
         import traceback, sys
         traceback.print_stack()
         sys.exit(1)

      return token_type, token

   # convets a symbol table type into a segment type
   def _type_to_segment(self, type):
      if type == self.symbol_table.STATIC:
         return self.vm_writer.STATIC
      elif type == self.symbol_table.FIELD:
         return self.vm_writer.THIS
      elif type == self.symbol_table.ARG:
         return self.vm_writer.ARG
      elif type == self.symbol_table.VAR:
         return self.vm_writer.LOCAL
      else:
         print "ERROR: Bad type %s" % (str(type))
 
   # resolves the symbol from the symbol table
   # the segment and index is returned as a 2-tuple
   def _resolve_symbol(self, name):
      kind, type, index = self.symbol_table.get(name)
      return self._type_to_segment(kind), index
Ejemplo n.º 2
0
class CompilationEngine():
    '''
    Parses a stream of jack tokens recursively.
    '''

    def __init__(self, tokenizer):
        self._name = tokenizer.get_filename().replace('.jack','')
        # tokenizer for input
        self._tokenizer = tokenizer
        # symbol table
        self._symbols = SymbolTable()
        # vm output fiole
        self._writer = VMWriter(self._name + '.vm')
        # Input should be a tokenized .jack file containing one class
        assert self._tokenizer.has_more_tokens()
        self._tokenizer.advance()
        self._class = None
        self._subroutine = None
        self._counter = 0
        self.compile_class()
        self.close()


    def change_name(self, name):
        self._name = name

    def get_name(self, name):
        return self._name

    def get_token(self):
        return self._tokenizer._token

    def get_type(self):
        return self._tokenizer._type

    def close(self):
        # close the output file at the end
        self._writer.close()

    def compile_class(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        # keyword - class
        assert self._tokenizer.keyword() == 'class'
        self._tokenizer.advance()
        # identifier - className
        assert self._tokenizer.identifier()
        self._class = self._tokenizer.identifier()
        self._tokenizer.advance()
        # sybmol - '{'
        assert self._tokenizer.symbol() == '{', "expected '{' but got " + self.get_token()
        self._tokenizer.advance()
        # classVarDec*
        while self._tokenizer.is_valid_class_variable():
            self.compile_class_var()
        # subroutineBody*
        while self._tokenizer.is_valid_subroutine():
            self.compile_subroutine()
        # sybmol - '}'
        assert self._tokenizer.symbol() == '}', "expected '}' but got " + self.get_token()
        self._tokenizer.advance()
        # assuming .jack file is properly formatted, there should be no more tokens
        assert not self._tokenizer.has_more_tokens()


    def compile_class_var(self):
        # ('static'|'field') type varName (',' varName)* ';'
        assert self._tokenizer.is_valid_class_variable()
        # keyword - 'static' or 'field'
        temp_kind = self._tokenizer.get_token()
        self._tokenizer.advance()
        # type - 'int' or 'char' or 'boolean' or className
        assert self._tokenizer.is_valid_type()
        temp_type = self._tokenizer.get_token()
        self._tokenizer.advance()
        # identifier - varName
        assert self._tokenizer.identifier()
        temp_name = self._tokenizer.get_token()
        self._symbols.define(temp_name, temp_type, temp_kind)
        self._tokenizer.advance()
        # recursively check for (',' varName)*  structure
        while self._tokenizer.symbol() == ',':
            self._tokenizer.advance()
            # identifier - varName
            assert self._tokenizer.identifier()
            temp_name = self._tokenizer.get_token()
            self._symbols.define(temp_name, temp_type, temp_kind)
            # symbol - ',' or ';'
            self._tokenizer.advance()
        # next token should be a ';'
        assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token()
        self._tokenizer.advance()


    def compile_subroutine(self):
        # ('constructor'|'method'|'function') ('void'| type) subroutineName '(' parameterList ')' subroutineBody
        assert self._tokenizer.is_valid_subroutine()
        self._symbols.start_subroutine()
        # keyword - constructor or method or function
        self._subroutine = self._tokenizer.get_token()
        if self._subroutine == 'method':
            # in the case of method, add 'this' to symbol table
            self._symbols.define('this', self._class, 'argument')
        self._tokenizer.advance()
        # keyword - type or void
        assert self._tokenizer.is_valid_subroutine_type()
        self._tokenizer.advance()
        # identifier - subroutineName
        assert self._tokenizer.identifier()
        temp_name = self._tokenizer.identifier()
        self._tokenizer.advance()
        # symbol - '('
        assert self._tokenizer.symbol() == '('
        self._tokenizer.advance()
        # parameterList
        if self._tokenizer.is_valid_type():
            self.compile_parameter_list()
        # symbol - '('
        assert self._tokenizer.symbol() == ')'
        self._tokenizer.advance()
        temp_name = self._class + '.' + temp_name
        # symbol - '{'
        assert self._tokenizer.symbol() == '{'
        # subroutineBody
        self.compile_subroutine_body(temp_name)
        self._writer.write_comment('end subroutine ' + temp_name)


    def compile_parameter_list(self):
        # ( (type varName) (',' type varName)* )?
        # only called if non-empty parameter list
        assert self._tokenizer.is_valid_type()
        # type - int or char or boolean or className
        temp_type = self._tokenizer.get_token()
        self._tokenizer.advance()
        # identifier - varName
        assert self._tokenizer.identifier()
        temp_name = self._tokenizer.get_token()
        self._symbols.define(temp_name, temp_type, 'argument')
        self._tokenizer.advance()
        while self._tokenizer.symbol() == ',':
            # symbol - ','
            self._tokenizer.advance()
            assert self._tokenizer.is_valid_type()
            # type - int or char or boolean or className
            temp_type = self._tokenizer.get_token()
            self._tokenizer.advance()
            # identifier - varName
            assert self._tokenizer.identifier()
            temp_name = self._tokenizer.get_token()
            self._symbols.define(temp_name, temp_type, 'argument')
            self._tokenizer.advance()
        # symbol - ')'
        assert self._tokenizer.symbol() == ')'


    def compile_subroutine_body(self, name):
        # '{' varDec* statements '}'
        # symbol - '{'
        assert self._tokenizer.symbol() == '{'
        self._tokenizer.advance()
        # varDec
        num_locals = 0
        while self._tokenizer.keyword() == 'var':
            # remember that compiling variables writes NO vm code
            num_locals += self.compile_var()
        self._writer.write_function(name, num_locals)
        if self._subroutine == 'method':
            # set this, in the case of a method
            self._writer.write_push('argument',0)
            self._writer.write_pop('pointer',0)
        elif self._subroutine == 'constructor':
            # allocate object
            self._writer.write_object_alloc(self._symbols.var_count('field'))
        # statements
        self.compile_statements()
        # symbol - '{'
        assert self._tokenizer.symbol() == '}'
        self._tokenizer.advance()


    def compile_var(self):
        # 'var' type varName (',' varName)* ';'
        assert self._tokenizer.is_valid_variable()
        # keyword - 'var'
        self._tokenizer.advance()
        # type - int or char or boolean or className
        assert self._tokenizer.is_valid_type()
        temp_type = self._tokenizer.get_token()
        self._tokenizer.advance()
        # identifier - varName
        assert self._tokenizer.identifier()
        temp_name = self._tokenizer.get_token()
        self._symbols.define(temp_name, temp_type, 'local')
        num_locals = 1
        self._tokenizer.advance()
        while self._tokenizer.symbol() == ',':
            # symbol - ','
            self._tokenizer.advance()
            # identifier - varName
            assert self._tokenizer.identifier()
            temp_name = self._tokenizer.get_token()
            self._symbols.define(temp_name, temp_type, 'local')
            num_locals += 1
            self._tokenizer.advance()
        # symbol - ';'
        assert self._tokenizer.symbol() == ';'
        self._tokenizer.advance()
        return num_locals


    def compile_statements(self):
        # statement*
        while self._tokenizer.is_valid_statement():
            if self._tokenizer.keyword() == 'let':
                # letStatement
                self.compile_let()
            elif self._tokenizer.keyword() == 'if':
                # ifStatement
                self.compile_if()
            elif self._tokenizer.keyword() == 'while':
                # whileStatement
                self.compile_while()
            elif self._tokenizer.keyword() == 'do':
                # doStatement
                self.compile_do()
            elif self._tokenizer.keyword() == 'return':
                # returnStatement
                self.compile_return()
        # symbol - '}'
        assert self._tokenizer.symbol() == '}'

    def compile_let(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        # keyword - 'let'
        assert self._tokenizer.keyword() == 'let'
        self._tokenizer.advance()
        # identifier - varName
        assert self._tokenizer.identifier()
        if self._tokenizer.peek() == '=':
            # varName '=' expression ';'
            var_kind = self._symbols.kind_of(self._tokenizer.identifier())
            var_index = self._symbols.index_of(self._tokenizer.identifier())
            self._tokenizer.advance()
            # next token is '='
            self._tokenizer.advance()
            # evaluate RHS expression, pop into variable
            self.compile_expression()
            if var_kind == 'field':
                self._writer.write_pop('this', var_index)
            else:
                self._writer.write_pop(var_kind, var_index)
            # expression ends with a ';'
            assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token()
            self._tokenizer.advance()
        elif self._tokenizer.peek() == '[':
            # varName '[' expression ']' '=' expression ';'
            # write base address to stack
            self._writer.write_push(self._symbols.kind_of(self._tokenizer.identifier()),
                                    self._symbols.index_of(self._tokenizer.identifier()))
            self._tokenizer.advance()
            # symbol - '['
            self._tokenizer.advance()
            # expression - represents array index
            self.compile_expression()
            # base address + array index
            self._writer.write_arithmetic('add')
            # symbol - '['
            assert self._tokenizer.symbol() == ']'
            self._tokenizer.advance()
            # symbol - '='
            assert self._tokenizer.symbol() == '='
            self._tokenizer.advance()
            # expression
            self.compile_expression()
            # pop RHS value into temp segment
            self._writer.write_pop('temp', 1)
            # align that with array[i]
            self._writer.write_pop('pointer', 1)
            # push value of RHS expression onto stack
            self._writer.write_push('temp', 1)
            # pop value into correct array index
            self._writer.write_pop('that', 0)
            # symbol - ';'
            assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token()
            self._tokenizer.advance()


    def compile_if(self):
        # 'if' '(' expression ')' ('else' '{' statements '}')?
        # keyword - if
        assert self._tokenizer.keyword() == 'if'
        self._writer.write_comment('if statement')
        self._tokenizer.advance()
        # symbol - (
        assert self._tokenizer.symbol() == '(', "expected '(' but got " + self.get_token()
        self._tokenizer.advance()
        # expression
        self.compile_expression()
        self._writer.write_arithmetic('not')
        label_num = str(self._counter)
        self._counter += 1
        self._writer.write_if('ELSE'+label_num)
        # symbol - )
        assert self._tokenizer.symbol() == ')', "expected '(' but got " + self.get_token()
        self._tokenizer.advance()
        # symbol - '{'
        assert self._tokenizer.symbol() == '{'
        self._tokenizer.advance()
        # statements
        self.compile_statements()
        # symbol - '}'
        assert self._tokenizer.symbol() == '}'
        self._tokenizer.advance()
        self._writer.write_goto('IF'+label_num)
        self._writer.write_label('ELSE'+label_num)
        # check for else
        if self._tokenizer.keyword() == 'else':
            # 'else' '{' statements '}'
            # keyword - 'else'
            self._tokenizer.advance()
            # symbol - '{'
            assert self._tokenizer.symbol() == '{', "expected '{' but got " + self.get_token()
            self._tokenizer.advance()
            # statements
            self.compile_statements()
            # symbol - '}'
            assert self._tokenizer.symbol() == '}', "expected '}' but got " + self.get_token()
            self._tokenizer.advance()
        self._writer.write_label('IF'+label_num)


    def compile_while(self):
        # 'while' '(' expression ')' '{' statements '}'
        # keyword - 'while'
        assert self._tokenizer.keyword() == 'while'
        # labels for ifgoto and goto vm commands
        label_num = str(self._counter)
        self._counter += 1
        self._tokenizer.advance()
        # symbol - '('
        assert self._tokenizer.symbol() == '('
        self._tokenizer.advance()
        self._writer.write_label('WHILE'+label_num)
        # expression
        self.compile_expression()
        self._writer.write_arithmetic('not')
        self._writer.write_if('ELSE'+label_num)
        # symbol - ')'
        assert self._tokenizer.symbol() == ')'
        self._tokenizer.advance()
        # symbol - '{'
        assert self._tokenizer.symbol() == '{'
        self._tokenizer.advance()
        # statements
        self.compile_statements()
        self._writer.write_goto('WHILE'+label_num)
        self._writer.write_label('ELSE'+label_num)
        # symbol - '}'
        assert self._tokenizer.symbol() == '}'
        self._tokenizer.advance()

    def compile_do(self):
        # 'do' subroutineCall ';'
        assert self._tokenizer.keyword() == 'do'
        # keyword - 'do'
        self._tokenizer.advance()
        # identifier - subroutineCall
        assert self._tokenizer.identifier()
        # outer subroutine must be void function
        self.compile_subroutine_call()
        # symbol - ';'
        assert self._tokenizer.symbol() == ';'
        # discard void function default return value
        self._writer.write_pop('temp',0)
        self._tokenizer.advance()



    def compile_return(self):
        # 'return' expression? ';'
        # keyword - 'return'
        assert self._tokenizer.keyword() == 'return'
        self._writer.write_comment('return statement')
        self._tokenizer.advance()
        # expression?
        if self._tokenizer.symbol() == ';':
            # symbol - ';' (void function)
            self._writer.write_push('constant', 0)
            self._tokenizer.advance()
        else:
            # expression (not void)
            self.compile_expression()
            # symbol - ';'
            assert self._tokenizer.symbol() == ';'
            self._tokenizer.advance()
        self._writer.write_return()


    def compile_expression(self):
        # term (op term)*
        # term
        self.compile_term()
        # check for op
        while self._tokenizer.is_valid_operator():
            # op
            temp_op = self._tokenizer.symbol()
            self._tokenizer.advance()
            # term
            self.compile_term()
            # write operator vm command, postfix order
            self._writer.write_operator(temp_op)


    def compile_term(self):
        # integerConstant | stringConstant | keywordConstant | varName |
        # varName '[' expression']' | subroutineCall | '(' expression ')' | unaryOp term
        if self._tokenizer.int_value() is not None:
            # integerConstant
            self._writer.write_push('constant', self._tokenizer.int_value())
            self._tokenizer.advance()
        elif self._tokenizer.string_value() is not None:
            # stringConstant
            self._writer.write_string_constant(self._tokenizer.string_value())
            self._tokenizer.advance()
        elif self._tokenizer.keyword() is not None:
            # keywordConstant
            self._writer.write_keyword_constant(self._tokenizer.keyword())
            self._tokenizer.advance()
        elif self._tokenizer.symbol() == '(':
            # '(' expression ')'
            self._tokenizer.advance()
            self.compile_expression()
            assert self._tokenizer.symbol() == ')'
            self._tokenizer.advance()
        elif self._tokenizer.is_valid_unary():
            # unaryOp term
            temp_op = self._tokenizer.symbol()
            self._tokenizer.advance()
            # term
            self.compile_term()
            # write operator vm command, postfix order
            self._writer.write_unary(temp_op)
        elif self._tokenizer.identifier() and self._tokenizer.peek() == '[':
            # varName '[' expression']'
            # process array name, push associated value onto stack
            self._writer.write_push(self._symbols.kind_of(self._tokenizer.identifier()),
                                    self._symbols.index_of(self._tokenizer.identifier()))
            self._tokenizer.advance()
            # process [ symbol
            self._tokenizer.advance()
            # expects expression, value is pushed onto the stack
            self.compile_expression()
            # setup pointer to array element
            self._writer.write_operator('+')
            self._writer.write_pop('pointer', 1)
            # push array value onto stack
            self._writer.write_push('that', 0)
            # expects closing square bracket
            assert self._tokenizer.symbol() == ']'
            self._tokenizer.advance()
        elif self._tokenizer.identifier() and self._tokenizer.peek() in ['(','.']:
            # subroutineCall
            self.compile_subroutine_call()
        elif self._symbols.exists(self._tokenizer.identifier()):
            # varName
            var_name = self._tokenizer.identifier()
            var_kind = self._symbols.kind_of(var_name)
            var_index = self._symbols.index_of(var_name)
            if var_kind == 'field':
                # push field var onto stack
                self._writer.write_push('this', var_index)
            else:
                self._writer.write_push(var_kind, var_index)
            self._tokenizer.advance()
        else:
            assert False, "unknown token: " + self.get_token() + " with type " + self.get_type()

    def compile_subroutine_call(self):
        # subroutineName '(' expressionList ')'| (className | varName) '.' subroutineName '(' expressionList ')'
        assert self._tokenizer.identifier() and self._tokenizer.peek() in ['(','.']
        if self._tokenizer.identifier() and self._tokenizer.peek() == '(':
            # subroutineName '(' expressionList ')'
            # method (in current class)
            temp_name = self._class + '.' + self._tokenizer.identifier()
            self._tokenizer.advance()
            # symbol - '('
            self._tokenizer.advance()
            # push this onto the stack
            self._writer.write_push('pointer',0)
            temp_nargs = 1
            # expressionList
            temp_nargs += self.compile_expression_list()
            self._writer.write_call(temp_name, temp_nargs)
            # symbol - ')'
            assert  self._tokenizer.symbol() == ')'
            self._tokenizer.advance()
        elif self._symbols.exists(self._tokenizer.identifier()) and self._tokenizer.peek() == '.':
            # varName '.' subroutineName '(' expressionList ')'
            # varName (object)
            temp_name = self._tokenizer.identifier()
            # push object address onto stack, this is an implicit argument
            if self._symbols.kind_of(temp_name) == 'field':
                self._writer.write_push('this',
                                        self._symbols.index_of(temp_name))
            else: 
                self._writer.write_push(self._symbols.kind_of(temp_name),
                                        self._symbols.index_of(temp_name))
            # change name to class name
            temp_name = self._symbols.type_of(temp_name)
            temp_nargs = 1
            self._tokenizer.advance()
            # symbol - '.'
            temp_name += self._tokenizer.get_token()
            self._tokenizer.advance()
            # subroutineName
            assert self._tokenizer.identifier()
            temp_name += self._tokenizer.identifier()
            self._tokenizer.advance()
            # symbol - '('
            assert self._tokenizer.symbol() == '('
            self._tokenizer.advance()
            # expressionList
            temp_nargs += self.compile_expression_list()
            self._writer.write_call(temp_name, temp_nargs)
            # symbol - '('
            assert  self._tokenizer.symbol() == ')'
            self._tokenizer.advance()
        elif self._tokenizer.identifier() and self._tokenizer.peek() == '.':
            # className . subroutineName '(' expressionList ')'
            # className
            temp_name = self._tokenizer.identifier()
            self._tokenizer.advance()
            # symbol - '.'
            temp_name += self._tokenizer.get_token()
            self._tokenizer.advance()
            # subroutineName
            assert self._tokenizer.identifier(), print(self._tokenizer._tokens)
            temp_name += self._tokenizer.identifier()
            self._tokenizer.advance()
            # symbol - '('
            assert self._tokenizer.symbol() == '('
            self._tokenizer.advance()
            # expressionList
            temp_nargs = self.compile_expression_list()
            self._writer.write_call(temp_name, temp_nargs)
            # symbol - ')'
            assert  self._tokenizer.symbol() == ')'
            self._tokenizer.advance()

    def compile_expression_list(self):
        # (expression ( ',' expression)* )?
        temp_nargs = 0
        while self._tokenizer.symbol() != ')':
            self.compile_expression()
            temp_nargs += 1
            if self._tokenizer.symbol() == ',':
                # there is another expression in the list
                self._tokenizer.advance()
        return temp_nargs