class CompilationEngine:
    """
    generates the compilers output
    """
    def __init__(self, input_file, output_file):
        """
        the constructor of the class
        :param input_file: the jack file that the user want to compile
        :param output_file: the path for the output xml file
        """
        self.file_reader = JackFileReader(input_file)
        self.jack_tokens = JackTokenizer(self.file_reader.get_one_liner())
        self.curr_token = self.jack_tokens.advance()
        self.to_output_file = []
        self.depth = 0
        self.compile_class()
        self.export_file(output_file)

    def compile_class(self):
        """
        Compiles a complete class.
        """
        self.to_output_file.append("<class>")
        self.depth += 1
        self.__eat('class')
        # class name
        self.__eat_by_type(IDENTIFIER)
        self.__eat(LEFT_CURLY_BRACKETS)
        # zero or more times
        while self.curr_token.split()[1] in VAR_DECS:
            self.compile_class_var_dec()
        # zero or more times
        while self.curr_token.split()[1] in SUB_ROUTINES:
            self.compile_subroutine_dec()
        self.__eat(RIGHT_CURLY_BRACKETS)
        self.depth -= 1
        self.to_output_file.append("</class>")
        return

    def compile_class_var_dec(self):
        """
        Compiles a static declaration or a field declaration.
        :return:
        """
        # compiles a static variable declaration, or a field declaration
        # ('static' | 'field' ) type varName (',' varName)* ';'
        self.to_output_file.append(INDENTATION * self.depth + "<classVarDec>")
        self.depth += 1
        self.__eat(self.curr_token.split()[1])
        # take the type as is
        self.__eat(self.curr_token.split()[1])
        self.__eat_by_type(IDENTIFIER)
        while self.curr_token.split()[1] == COMMA:
            self.__eat(COMMA)
            self.__eat_by_type(IDENTIFIER)
        self.__eat(SEMI_COLON)
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth + "</classVarDec>")
        return

    def compile_subroutine_dec(self):
        """
        Compiles a complete method, function, or constructor.
        :return:
        """
        # ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
        self.to_output_file.append(INDENTATION * self.depth +
                                   "<subroutineDec>")
        self.depth += 1
        self.__eat(self.curr_token.split()[1])
        # take the return type as is (or void)
        self.__eat(self.curr_token.split()[1])
        # subroutine name
        self.__eat_by_type(IDENTIFIER)
        self.__eat(LEFT_BRACKETS)
        self.compile_parameters_list()
        self.__eat(RIGHT_BRACKETS)
        self.compile_subroutine_body()
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth +
                                   "</subroutineDec>")
        return

    def compile_parameters_list(self):
        """
        Compiles a (possibly empty) parameter list, not including the enclosing “()”.
        :return:
        """
        # ( (type varName) (',' type varName)*)?
        self.to_output_file.append(INDENTATION * self.depth +
                                   "<parameterList>")
        self.depth += 1
        if self.curr_token.split()[1] != RIGHT_BRACKETS:
            # type
            self.__eat(self.curr_token.split()[1])
            # var mane
            self.__eat_by_type(IDENTIFIER)
            while self.curr_token.split()[1] == COMMA:
                self.__eat(COMMA)
                # type
                self.__eat(self.curr_token.split()[1])
                # var mane
                self.__eat_by_type(IDENTIFIER)
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth +
                                   "</parameterList>")
        return

    def compile_subroutine_body(self):
        """
        compiles the subroutine body
        :return:
        """
        # '{' varDec* statements '}'
        self.to_output_file.append(INDENTATION * self.depth +
                                   "<subroutineBody>")
        self.depth += 1
        self.__eat(LEFT_CURLY_BRACKETS)
        while self.curr_token.split()[1] == "var":
            self.compile_var_dec()
        self.compile_statements()
        self.__eat(RIGHT_CURLY_BRACKETS)
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth +
                                   "</subroutineBody>")
        return

    def compile_var_dec(self):
        """
        Compiles a var declaration.
        :return:
        """
        # 'var' type varName (',' varName)* ';'
        self.to_output_file.append(INDENTATION * self.depth + "<varDec>")
        self.depth += 1
        self.__eat("var")
        # type
        self.__eat(self.curr_token.split()[1])
        # var mane
        self.__eat_by_type(IDENTIFIER)
        while self.curr_token.split()[1] == COMMA:
            self.__eat(COMMA)
            # var mane
            self.__eat_by_type(IDENTIFIER)
        self.__eat(SEMI_COLON)
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth + "</varDec>")
        return

    def compile_statements(self):
        """
        Compiles a sequence of statements, not including the enclosing “{}”.
        :return:
        """
        self.to_output_file.append(INDENTATION * self.depth + "<statements>")
        self.depth += 1
        statements = True
        while statements:
            statement_type = self.curr_token.split()[1]
            if statement_type == "let":
                self.compile_let()
            elif statement_type == "if":
                self.compile_if()
            elif statement_type == "while":
                self.compile_while()
            elif statement_type == "do":
                self.compile_do()
            elif statement_type == "return":
                self.compile_return()
            else:
                statements = False
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth + "</statements>")
        return

    def compile_let(self):
        """
        Compiles a let statement.
        :return:
        """
        self.to_output_file.append(INDENTATION * self.depth + "<letStatement>")
        self.depth += 1
        self.__eat("let")
        # var name
        self.__eat_by_type(IDENTIFIER)
        if self.curr_token.split()[1] == LEFT_SQUARE_BRACKETS:
            self.__eat(LEFT_SQUARE_BRACKETS)
            self.compile_expression()
            self.__eat(RIGHT_SQUARE_BRACKETS)
        self.__eat(EQUAL_SIGN)
        self.compile_expression()
        self.__eat(SEMI_COLON)
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth +
                                   "</letStatement>")
        return

    def compile_if(self):
        """
        Compiles a if statement.
        :return:
        """
        self.to_output_file.append(INDENTATION * self.depth + "<ifStatement>")
        self.depth += 1
        self.__eat("if")
        self.__eat(LEFT_BRACKETS)
        self.compile_expression()
        self.__eat(RIGHT_BRACKETS)
        self.__eat(LEFT_CURLY_BRACKETS)
        self.compile_statements()
        self.__eat(RIGHT_CURLY_BRACKETS)
        if self.curr_token.split()[1] == "else":
            self.__eat("else")
            self.__eat(LEFT_CURLY_BRACKETS)
            self.compile_statements()
            self.__eat(RIGHT_CURLY_BRACKETS)
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth + "</ifStatement>")
        return

    def compile_while(self):
        """
        Compiles a while statement.
        :return:
        """
        self.to_output_file.append(INDENTATION * self.depth +
                                   "<whileStatement>")
        self.depth += 1
        self.__eat('while')
        self.__eat('(')
        self.compile_expression()
        self.__eat(')')
        self.__eat('{')
        self.compile_statements()
        self.__eat('}')
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth +
                                   "</whileStatement>")
        return

    def compile_do(self):
        """
        Compiles a do statement.
        :return:
        """
        # 'do' subroutineCall ';'
        self.to_output_file.append(INDENTATION * self.depth + "<doStatement>")
        self.depth += 1
        self.__eat("do")

        # subroutine call:
        # subroutine name
        self.__eat_by_type(IDENTIFIER)
        if self.curr_token.split()[1] == ".":
            self.__eat(".")
            self.__eat_by_type(IDENTIFIER)
        self.__eat(LEFT_BRACKETS)
        self.compile_expression_list()
        self.__eat(RIGHT_BRACKETS)

        self.__eat(SEMI_COLON)
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth + "</doStatement>")
        return

    def compile_return(self):
        """
        Compiles a return statement.
        :return:
        """
        # 'return' expression? ';'
        self.to_output_file.append(INDENTATION * self.depth +
                                   "<returnStatement>")
        self.depth += 1
        self.__eat("return")
        if self.curr_token.split()[1] != SEMI_COLON:
            self.compile_expression()
        self.__eat(SEMI_COLON)
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth +
                                   "</returnStatement>")
        return

    def compile_expression(self):
        """
        Compiles a do statement.
        :return:
        """
        self.to_output_file.append(INDENTATION * self.depth + "<expression>")
        self.depth += 1
        self.compile_term()
        while self.curr_token.split()[1] in Syntax.operators:
            # op
            self.__eat(self.curr_token.split()[1])
            self.compile_term()
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth + "</expression>")
        return

    def compile_term(self):
        """
        Compiles a term. This routine is faced with a
        slight difficulty when trying to decide
        between some of the alternative parsing rules.
        Specifically, if the current token is an
        identifier, the routine must distinguish
        between a variable, an array entry, and a
        subroutine call. A single look-ahead token,
        which may be one of “[“, “(“, or “.”
        suffices to distinguish between the three
        possibilities. Any other token is not part of
        this term and should not be advanced over.
            :return:
        """
        self.to_output_file.append(INDENTATION * self.depth + "<term>")
        self.depth += 1
        # header, val, ender = self.curr_token.split()
        all = self.curr_token.split()
        header = all[0]
        val = all[1]
        # handle case of stringConstant, integerConstant, keyword
        if header in END_TERMS:
            self.__eat(val)
        # handle in case of (expression)
        elif val == LEFT_BRACKETS:
            self.__eat(LEFT_BRACKETS)
            self.compile_expression()
            self.__eat(RIGHT_BRACKETS)
        # case of  onary Op
        elif val in ONARY_OP:
            self.__eat(val)
            self.compile_term()
        elif header == IDENTIFIER:
            next_token = self.jack_tokens.peek().split()[1]
            if next_token == LEFT_SQUARE_BRACKETS:
                self.__eat(val)
                self.__eat(LEFT_SQUARE_BRACKETS)
                self.compile_expression()
                self.__eat(RIGHT_SQUARE_BRACKETS)
            # subroutine call: subroutineName(expressionList)
            elif next_token == LEFT_BRACKETS:
                self.__eat(val)
                self.__eat(LEFT_BRACKETS)
                self.compile_expression_list()
                self.__eat(RIGHT_BRACKETS)
            # subroutine call: (className|varName).subroutineName(expressionList)
            elif next_token == ".":
                self.__eat(val)
                self.__eat(".")
                self.__eat_by_type(IDENTIFIER)
                self.__eat(LEFT_BRACKETS)
                self.compile_expression_list()
                self.__eat(RIGHT_BRACKETS)
            else:
                self.__eat(val)
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth + "</term>")
        return

    def compile_expression_list(self):
        """
        Compiles a (possibly empty) comma separated list of expressions.
        :return:
        """
        self.to_output_file.append(INDENTATION * self.depth +
                                   "<expressionList>")
        self.depth += 1
        if self.curr_token.split()[1] != RIGHT_BRACKETS:
            self.compile_expression()
            while self.curr_token.split()[1] == COMMA:
                self.__eat(COMMA)
                self.compile_expression()
        self.depth -= 1
        self.to_output_file.append(INDENTATION * self.depth +
                                   "</expressionList>")
        return

    def __eat(self, param):
        """
        checks that the right token is the next one, adds it to the output file, and advances the token pointer
        :param param: the param to compare with the next token
        :return: throws exception for wrong input
        """
        token = self.curr_token.split()
        if token[1] != param:
            raise Exception
        else:
            self.to_output_file.append(INDENTATION * self.depth +
                                       self.curr_token)
            self.curr_token = self.jack_tokens.advance()
            if not self.curr_token:
                return

    def __eat_by_type(self, param):
        """
        checks that the right token is the next one- by type, adds it to the output file,
        and advances the token pointer
        :param param: the param to compare with the next token
        :return: throws exception for wrong input
        """
        type_ = self.curr_token.split()[0]
        if type_ != param:
            raise Exception
        else:
            self.to_output_file.append(INDENTATION * self.depth +
                                       self.curr_token)
            self.curr_token = self.jack_tokens.advance()

    def export_file(self, output_file):
        """
        exports the file with the given path
        :param output_file: the path
        :return:
        """
        with open(output_file, "w") as file:
            for line in self.to_output_file:
                file.write(line + "\n")
        return
Exemple #2
0
class CompilationEngine:
	"""
	generates the compilers output
	"""

	def __init__(self, input_file, output_file):
		"""
		the constructor of the class
		:param input_file: the jack file that the user want to compile
		:param output_file: the path for the output xml file
		"""
		self.label_count = 0
		self.file_reader = JackFileReader(input_file)
		self.jack_tokens = JackTokenizer(self.file_reader.get_one_liner())
		self.curr_token = self.jack_tokens.advance()
		self.to_output_file = []
		self.symbol_table = SymbolTable()
		self.vm_writer = VMWriter(output_file)
		self.class_name = None
		self.compile_class()
		self.vm_writer.close()

	def compile_class(self):
		"""
		Compiles a complete class.
		"""
		# advancing beyond 'class'
		self.next_token()
		# assign class name
		self.class_name = self.next_token()
		# advancing beyond '{'
		self.next_token()
		# zero or more times
		while self.curr_token.split()[1] in VAR_DECS:
			self.compile_class_var_dec()
		# zero or more times
		while self.curr_token.split()[1] in SUB_ROUTINES:
			self.compile_subroutine_dec()
		# advancing beyond '}'
		self.next_token()
		return

	def compile_class_var_dec(self):
		"""
		Compiles a static declaration or a field declaration.
		:return:
		"""
		# compiles a static variable declaration, or a field declaration
		# ('static' | 'field' ) type varName (',' varName)* ';'
		var_kind = self.next_token()
		var_type = self.next_token()
		var_name = self.next_token()
		self.symbol_table.define(var_name, var_type, var_kind)
		while self.curr_token.split()[1] == COMMA:
			# advancing the COMMA
			self.next_token()
			var_name = self.next_token()
			self.symbol_table.define(var_name, var_type, var_kind)
		# advance beyond ;
		self.next_token()
		return

	def compile_subroutine_dec(self):
		"""
		Compiles a complete method, function, or constructor.
		:return:
		"""
		self.symbol_table.start_subroutine()
		# constructor \ function \ method
		subroutine_type = self.next_token()
		# advance the return type
		self.next_token()
		# subroutine name
		subroutine_name = self.class_name + "." + self.next_token()
		# advance the left brackets
		self.next_token()
		if subroutine_type == "method":
			self.symbol_table.define("this", self.class_name, ARG)
		self.compile_parameters_list()
		self.vm_writer.write_function(subroutine_name, self.count_var_dec())

		if subroutine_type == "constructor":
			field_vars_num = self.get_num_of_field_vars()
			self.vm_writer.write_push("constant", field_vars_num)
			self.vm_writer.write_call("Memory.alloc", 1)
			self.vm_writer.write_pop("pointer", 0)

		if subroutine_type == "method":
			self.vm_writer.write_push("argument", 0)
			self.vm_writer.write_pop("pointer", 0)

		# advance the right brackets
		self.next_token()
		self.compile_subroutine_body()

	def get_num_of_field_vars(self):
		field_vars_num = 0
		for var in self.symbol_table.class_symbol_table.values():
			if var[1] == "field":
				field_vars_num += 1
		return field_vars_num

	def compile_parameters_list(self):
		"""
		Compiles a (possibly empty) parameter list, not including the enclosing ().
		:return:
		"""
		num_of_par = 0
		if self.curr_token.split()[1] != RIGHT_BRACKETS:
			# type
			num_of_par += 1
			par_type = self.next_token()
			par_name = self.next_token()
			self.symbol_table.define(par_name, par_type, ARG)
			while self.curr_token.split()[1] == COMMA:
				# advance pass the comma:
				num_of_par += 1
				self.next_token()
				par_type = self.next_token()
				par_name = self.next_token()
				self.symbol_table.define(par_name, par_type, ARG)
		return num_of_par

	def count_var_dec(self):
		"""
		counts the number of variables the subroutine has
		:return:
		"""
		var_count = 0
		temp_pointer = self.jack_tokens.curr_token + 1
		# 'var' type varName (',' varName)* ';'
		while self.jack_tokens.list_of_tokens[temp_pointer].split()[1] == "var":
			var_count += 1
			# skip var type varName
			temp_pointer = temp_pointer + 3
			while self.jack_tokens.list_of_tokens[temp_pointer].split()[1] == COMMA:
				var_count += 1
				temp_pointer += 2
			# advance passed ;
			temp_pointer += 1
		return var_count

	def compile_subroutine_body(self):
		"""
		compiles the subroutine body
		:return:
		"""
		# pass the left curly brackets
		self.next_token()
		while self.curr_token.split()[1] == "var":
			self.compile_var_dec()
		self.compile_statements()
		# pass the right curly brackets
		self.next_token()

	def compile_var_dec(self):
		"""
		Compiles a var declaration.
		:return:
		"""
		# advance passed "var"
		self.next_token()
		var_type = self.next_token()
		var_name = self.next_token()
		self.symbol_table.define(var_name, var_type, LCL)
		while self.curr_token.split()[1] == COMMA:
			# advance passed COMMA
			self.next_token()
			var_name = self.next_token()
			self.symbol_table.define(var_name, var_type, LCL)
		# advance passed ;
		self.next_token()
		return

	def compile_statements(self):
		statements = True
		while statements:
			statement_type = self.curr_token.split()[1]
			if statement_type == "let":
				self.compile_let()
			elif statement_type == "if":
				self.compile_if()
			elif statement_type == "while":
				self.compile_while()
			elif statement_type == "do":
				self.compile_do()
			elif statement_type == "return":
				self.compile_return()
			else:
				statements = False

	def compile_let(self):
		"""
		Compiles a let statement.
		:return:
		"""
		# advances passed let
		self.next_token()
		# var name
		var_name = self.next_token()
		var_kind = self.symbol_table.kind_of(var_name)
		if var_kind == "field":
			var_kind = "this"
		var_index = self.symbol_table.index_of(var_name)
		# for varName[] case
		list_flag = False
		if self.curr_token.split()[1] == LEFT_SQUARE_BRACKETS:
			list_flag = True
			# advance brackets
			self.next_token()
			self.compile_expression()
			self.vm_writer.write_push(var_kind, var_index)
			self.vm_writer.write_arithmetic("+")
			# advance brackets
			self.next_token()

		# advance equal sign
		self.next_token()
		self.compile_expression()
		if list_flag:
			# the value of expression 2
			self.vm_writer.write_pop("temp", 0)
			self.vm_writer.write_pop("pointer", 1)
			self.vm_writer.write_push("temp", 0)
			self.vm_writer.write_pop("that", 0)
		else:
			self.vm_writer.write_pop(var_kind, var_index)

		# advance semi colon
		self.next_token()

	def compile_if(self):
		"""
		Compiles a if statement.
		:return:
		"""
		# advance the if
		self.next_token()
		# advance the left brackets
		self.next_token()
		self.compile_expression()
		# self.vm_writer.write_arithmetic(NOT)
		label_1 = self.next_label()
		self.vm_writer.write_if(label_1)
		label_2 = self.next_label()
		self.vm_writer.write_goto(label_2)
		# label if true
		self.vm_writer.write_label(label_1)
		# advance the right brackets
		self.next_token()

		# advance the left curly brackets
		self.next_token()

		self.compile_statements()

		# advance the right curly brackets
		self.next_token()
		# label if false

		if self.curr_token.split()[1] == "else":
			label_3 = self.next_label()
			self.vm_writer.write_goto(label_3)
			self.vm_writer.write_label(label_2)
			# advance the else
			self.next_token()
			# advance the left curly brackets
			self.next_token()
			self.compile_statements()
			# advance the right curly brackets
			self.next_token()
			self.vm_writer.write_label(label_3)

		else:
			self.vm_writer.write_label(label_2)

	def compile_while(self):
		"""
		Compiles a while statement.
		:return:
		"""
		# advance the while
		self.next_token()
		# advance the left brackets
		self.next_token()
		label_1 = self.next_label()
		self.vm_writer.write_label(label_1)
		self.compile_expression()
		self.vm_writer.write_arithmetic(NOT)
		label_2 = self.next_label()
		self.vm_writer.write_if(label_2)
		# advance the right brackets
		self.next_token()
		# advance the left curly brackets
		self.next_token()
		self.compile_statements()
		self.vm_writer.write_goto(label_1)
		self.vm_writer.write_label(label_2)
		# advance the right curly brackets
		self.next_token()

	def compile_subroutine_call(self):

		subroutine_name = self.next_token()
		kind = self.symbol_table.kind_of(subroutine_name)
		if kind == "field":
			kind = "this"
		index = self.symbol_table.index_of(subroutine_name)

		from_class = False
		if self.curr_token.split()[1] == ".":
			# advance the dot
			self.next_token()
			type_ = self.symbol_table.type_of(subroutine_name)
			if type_:
				subroutine_name = type_ + "." + self.next_token()
			else:
				subroutine_name = subroutine_name + "." + self.next_token()
		else:
			from_class = True
			subroutine_name = self.class_name + "." + subroutine_name
		# advance the brackets
		self.next_token()
		num_of_arguments = 0
		if from_class:
			self.vm_writer.write_push("pointer", 0)
			num_of_arguments = 1
		if kind is not None and index is not None:
			self.vm_writer.write_push(kind, index)
			num_of_arguments = 1
		num_of_arguments += self.compile_expression_list()
		# advance the brackets
		self.next_token()

		self.vm_writer.write_call(subroutine_name, num_of_arguments)

	def compile_do(self):
		"""
		Compiles a do statement.
		:return:
		"""
		# advance the do
		self.next_token()

		# subroutine call:
		self.compile_subroutine_call()
		self.vm_writer.write_pop("temp", 0)
		# advance the semi colon
		self.next_token()

	def compile_return(self):
		"""
		Compiles a return statement.
		:return:
		"""
		# advance the return
		self.next_token()
		if self.curr_token.split()[1] != SEMI_COLON:
			if self.curr_token.split()[1] == "this":
				self.vm_writer.write_push("pointer", 0)
				self.next_token()
			else:
				self.compile_expression()
		else:
			# default
			self.vm_writer.write_push("constant", 0)
		self.vm_writer.write_return()
		# advance the semi colon
		self.next_token()

	def compile_expression(self):
		"""
		Compiles a do statement.
		:return:
		"""
		self.compile_term()
		while self.curr_token.split()[1] in Syntax.operators:
			op = self.curr_token.split()[1]
			self.next_token()
			self.compile_term()
			self.compile_op(op)
		return

	def compile_op(self, op):
		if op == "*":
			self.vm_writer.write_call("Math.multiply", 2)
		elif op == "/":
			self.vm_writer.write_call("Math.divide", 2)
		else:
			self.vm_writer.write_arithmetic(op)

	def compile_term(self):
		"""
		Compiles a term. This routine is faced with a slight difficulty when trying to decide between
		some of the alternative parsing rules. Specifically, if the current token is an
		identifier, the routine must distinguish between a variable, an array entry, and a
		subroutine call. A single look-ahead token, which may be one of [, (, or .  suffices to distinguish
		between the three possibilities. Any other token is not part of this term and should not be advanced over.
		:return:
		"""
		all_ = self.curr_token.split()
		header = all_[0]
		val = all_[1]
		# handle case of stringConstant, integerConstant, keyword
		if header == "<integerConstant>":
			self.vm_writer.write_push("constant", val)
			self.next_token()
		# handle in case of (expression)
		elif val == LEFT_BRACKETS:
			# advance passed "("
			self.next_token()
			self.compile_expression()
			# advance passed ")"
			self.next_token()
		# case of  onary Op
		elif val in ONARY_OP:
			self.next_token()
			self.compile_term()
			if val == "-":
				self.vm_writer.write_arithmetic(NEG)
			else:
				self.vm_writer.write_arithmetic(NOT)
		elif header == IDENTIFIER:
			next_token = self.jack_tokens.peek().split()[1]
			if next_token == LEFT_SQUARE_BRACKETS:
				# skip name and "["
				self.next_token()
				self.next_token()
				self.compile_expression()
				kind = self.symbol_table.kind_of(val)
				if kind == "field":
					kind = "this"
				self.vm_writer.write_push(kind, self.symbol_table.index_of(val))
				self.vm_writer.write_arithmetic("+")
				# skip over "]"
				self.next_token()
				self.vm_writer.write_pop("pointer", 1)
				self.vm_writer.write_push("that", 0)

			# subroutine call: subroutineName(expressionList)
			elif next_token == LEFT_BRACKETS or next_token == ".":
				self.compile_subroutine_call()
			else:
				kind = self.symbol_table.kind_of(val)
				if kind == "field":
					kind = "this"
				self.vm_writer.write_push(kind, self.symbol_table.index_of(val))
				self.next_token()

		elif header == "<keyword>":
			if val == "this":
				self.vm_writer.write_push("pointer", 0)
			else:
				self.vm_writer.write_push("constant", 0)
				if val == "true":
					self.vm_writer.write_arithmetic(NOT)
			self.next_token()

		elif header == "<stringConstant>":
			the_string = self.curr_token[17:-18]
			self.vm_writer.write_push("constant", len(the_string))
			self.vm_writer.write_call("String.new", 1)
			for char in the_string:
				self.vm_writer.write_push("constant", ord(char))
				self.vm_writer.write_call("String.appendChar", 2)
			self.next_token()

		return

	def compile_expression_list(self):
		"""
		Compiles a (possibly empty) comma separated list of expressions.
		:return:
		"""
		num_of_arguments = 0
		if self.curr_token.split()[1] != RIGHT_BRACKETS:
			num_of_arguments += 1
			self.compile_expression()
			while self.curr_token.split()[1] == COMMA:
				num_of_arguments += 1
				# advance comma
				self.next_token()
				self.compile_expression()
		return num_of_arguments

	def next_token(self):
		to_return = self.curr_token.split()[1]
		self.curr_token = self.jack_tokens.advance()
		return to_return

	def next_label(self):
		count = self.label_count
		self.label_count += 1
		return "LABEL" + str(count)
Exemple #3
0
class CompilationEngine():
    vardeclist = ['static', 'field']
    subroutinelist = ['constructor', 'function', 'method', 'void']
    statementList = ['let', 'while', 'if', 'return', 'do', 'turn']
    op_dict = {
        '+': 'add',
        '-': 'sub',
        '&': 'and',
        '|': 'or',
        '<': 'lt',
        '>': 'gt',
        '=': 'eq',
        '~': 'not',
        '&amp;': 'and',
        '&lt;': 'lt',
        '&gt;': 'gt'
    }
    running_index = 0
    label_index = 0
    while_label_index = 0
    if_label_index = 0

    def __init__(self, inputfile, writefile):
        self.op_flag1 = False
        self.op_flag2 = False
        self.is_Array = False
        self.ClassName = ''
        self.keyword = ''
        self.cur_subroutineName = ''
        self.sub_return_type = ''
        self.running_index = 0
        self.array_name = ''

        self.Tokenizer = JackTokenizer(inputfile)
        self.SymbolTable = SymbolTable()
        self.infile = inputfile
        self.VMWriter = VMWriter(inputfile.replace('.jack', '.vm'))
        self.outfile = open(writefile, 'w+')
        self.compileClass()

    def compileClass(self):  #draft finished
        print('Compiling started of ' + self.infile)
        self.Tokenizer.advance()  #starts it, token = 'class'
        if 'class' in self.Tokenizer.getToken():

            self.Tokenizer.advance()  #token = classname
            self.ClassName = self.Tokenizer.getStrToken()
            self.Tokenizer.advance()  #next token = {

            #cleaned up from project 10 put while loop in each respective method
            self.compileClassVarDec()
            self.compileSubroutine()

            self.Tokenizer.advance()  #next token
        else:
            print('error: NO CLASS TO COMPILE')

    def compileClassVarDec(
            self):  #finished untested ######WHILE LOOP NOT TURNING ON
        self.Tokenizer.advance()  #adv0ance off { to first token in var dec

        while 'static' in self.Tokenizer.getStrToken(
        ) or 'field' in self.Tokenizer.getStrToken():

            kind = self.Tokenizer.getStrToken()  #static or
            self.Tokenizer.advance()  #next token advance to type
            type = self.Tokenizer.getStrToken()
            self.Tokenizer.advance()  #next token advance to name
            name = self.Tokenizer.getStrToken()
            self.Tokenizer.advance()  #advances to either , or ;

            self.SymbolTable.define(name, type, kind)

            while ';' not in self.Tokenizer.getToken(
            ):  #while you havent reached the end of the line
                self.Tokenizer.advance(
                )  #advances to next token which is hopefully a name
                name = self.Tokenizer.getStrToken()
                self.SymbolTable.define(name, type, kind)

                self.Tokenizer.advance()  #advances to either , or ;

            #have reached ;
            self.Tokenizer.advance()  #next token

    def compileSubroutine(self):  #draft finished
        #contains subroutine DEc and subroutine Body
        #something f****d up here
        #return_is_void=False
        while 'constructor' in self.Tokenizer.getStrToken(
        ) or 'method' in self.Tokenizer.getStrToken(
        ) or 'function' in self.Tokenizer.getStrToken():
            constructor_flag = False
            method_flag = False
            self.SymbolTable.startSubroutine()
            if 'constructor' in self.Tokenizer.getStrToken():
                constructor_flag = True
            if 'method' in self.Tokenizer.getStrToken():
                method_flag = True
            #current token right now should be either constructor, method, or function
            self.Tokenizer.advance()  #advances to type of return eg int
            self.sub_return_type = self.Tokenizer.getStrToken()
            self.Tokenizer.advance(
            )  #next token advances to subroutine name (should be new if constructor)
            self.cur_subroutineName = self.ClassName + '.' + self.Tokenizer.getStrToken(
            )
            if 'method' in self.Tokenizer.getStrToken():
                self.SymbolTable.define('this', self.sub_return_type,
                                        'argument')
            self.Tokenizer.advance()  #next token advances to (
            self.compileParameterList()
            #after parameter list finishes token is )
            self.Tokenizer.advance()  #next token advances to {

            #starts subroutine body
            self.Tokenizer.advance(
            )  #advances to start of subroutine body First token there
            #moved while loop below into compileVardec
            #while 'var' in self.Tokenizer.getToken():
            self.compileVarDec()

            self.VMWriter.writeFunction(
                self.cur_subroutineName, self.SymbolTable.varCount('var')
            )  #calls function related to parent class, if its a function all good. if its method or constructor more has to happen
            self.if_label_index = 0
            if constructor_flag == True:
                self.VMWriter.writePush(
                    'constant', self.SymbolTable.varCount('field')
                )  #pushes the constructors fields onto stack for however many fields there are
                self.VMWriter.writeCall('Memory.alloc', 1)
                self.VMWriter.writePop('pointer', 0)
            elif method_flag == True:
                self.VMWriter.writePush('argument', 0)
                self.VMWriter.writePop('pointer', 0)  #init this

            #compile the rest of subroutine
            self.compileStatements()
            #current token after should be }
            self.Tokenizer.advance(
            )  #next statement constructor and method function if not breaks out of while loop

    def compileParameterList(self):  #draft finished

        while ')' not in self.Tokenizer.getToken():
            self.Tokenizer.advance(
            )  #advance to type of parameter or ) in case of no param
            if ')' not in self.Tokenizer.getStrToken(
            ):  #parameter here to parameter stuff
                if ',' not in self.Tokenizer.getStrToken():
                    type = self.Tokenizer.getStrToken()
                    self.Tokenizer.advance()  #advances to name
                    name = self.Tokenizer.getStrToken()
                    self.Tokenizer.advance(
                    )  #advances to either comma or end of list start loop over

                    self.SymbolTable.define(name, type, 'argument')
            else:
                return  # ')' is token, function is done

    def compileVarDec(self):  #draft

        while 'var' in self.Tokenizer.getToken():

            if 'var' in self.Tokenizer.getToken():
                self.Tokenizer.advance()  #advances to type
                type = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()  #advances to name
                name = self.Tokenizer.getStrToken()
                self.SymbolTable.define(name, type,
                                        'var')  #appends to symbol table
                self.Tokenizer.advance()  #advances to either ,  or ;

                while ';' not in self.Tokenizer.getToken():
                    self.Tokenizer.advance()  #advances to name
                    name = self.Tokenizer.getStrToken()
                    self.SymbolTable.define(name, type, 'var')
                    self.Tokenizer.advance(
                    )  #next token advances to either , or ;

                self.Tokenizer.advance(
                )  #next token var if more vars or done if not

    def compileStatements(self):  #finished

        while 'if' or 'let' or 'while' or 'do' or 'return' in self.Tokenizer.getStrToken(
        ):

            if 'let' in self.Tokenizer.getToken():
                self.compileLet()

            elif 'while' in self.Tokenizer.getToken():
                self.compileWhile()

            elif 'if' in self.Tokenizer.getToken():
                self.compileIf()

            elif 'do' in self.Tokenizer.getToken():
                self.compileDo()

            elif 'return' in self.Tokenizer.getToken():

                self.compileReturn()
            else:

                break

    def compileCall(self):  #finished untested
        #do without pop temp 0
        #next token to function/method name
        doCallName = self.Tokenizer.getStrToken()

        self.Tokenizer.advance()  #advances to '.' or '('
        if '.' in self.Tokenizer.getToken():
            if doCallName in self.SymbolTable.SubSymbolTable:  #ie method like square.move()
                self.Tokenizer.advance()
                subName = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()  #next token '('
                self.Tokenizer.advance()  #first expression
                self.compileExpressionList()

                nArgs = self.running_index  #running index added whenever expression called in expression list +1 for this
                self.running_index = 0  #resets index
                self.VMWriter.writePush(
                    self.SymbolTable.kindOf(doCallName),
                    self.SymbolTable.indexOf(
                        doCallName))  #pushes 'this' of object onto stack

                self.VMWriter.writeCall(
                    self.SymbolTable.typeOf(doCallName) + '.' + subName, nArgs)
                return

            else:  #ie function like Keyboard.keyPressed() Sys.wait(5) basically same thing except you dont push this on stack

                self.Tokenizer.advance()
                subName = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()  #next token '('
                self.Tokenizer.advance()
                self.compileExpressionList()
                nArgs = self.running_index  #running index added whenever expression called in expression list
                #resets index
                self.running_index = 0
                if doCallName in self.SymbolTable.ClassSymbolTable:
                    self.VMWriter.writePush('this', nArgs - 1)
                    self.VMWriter.writeCall(
                        self.SymbolTable.typeOf(doCallName) + '.' + subName,
                        nArgs)
                else:
                    if doCallName == 'Keyboard':
                        self.VMWriter.writeCall(doCallName + '.' + subName,
                                                nArgs - 1)
                    else:
                        self.VMWriter.writeCall(doCallName + '.' + subName,
                                                nArgs)

        if '(' in self.Tokenizer.getToken():  #method call like do clear()
            self.Tokenizer.advance()  #token is first token in expression list
            self.compileExpressionList()
            nArgs = self.running_index
            self.running_index = 0
            self.VMWriter.writePush('pointer', 0)  #pushes this

            self.VMWriter.writeCall(self.ClassName + '.' + doCallName, nArgs)

    def compileDo(self):
        self.Tokenizer.advance()
        self.compileCall()
        self.Tokenizer.advance()
        self.Tokenizer.advance()
        self.VMWriter.writePop('temp', 0)

    #dont think this works for arrays like a[b[c[5]] yet NOT CONFIDENT ON THIS METHOD

    def compileLet(self):
        #current token is let
        #doesnt work for a[i]
        self.Tokenizer.advance()  #now token = varname
        varName = self.Tokenizer.getStrToken()
        self.array_name = self.Tokenizer.getStrToken()
        self.Tokenizer.advance()  # = or [

        if '[' in self.Tokenizer.getStrToken(
        ):  #array, see section 11.1.1 Array Handling for help or unit 5.8 video
            self.is_array = True
            self.compileArrayExp()  #compile arrary term

            self.compileExpression()
            self.VMWriter.writePop('temp', 0)
            self.VMWriter.writePop('pointer', 1)
            self.VMWriter.writePush('temp', 0)
            self.VMWriter.writePop('that', 0)

        else:
            self.Tokenizer.advance()  #expression

            self.compileExpression()  #eg(5*(3+4))

            self.VMWriter.writePop(
                self.SymbolTable.kindOf(varName),
                self.SymbolTable.indexOf(varName))  #pop expression t
        self.Tokenizer.advance()
        if ';' in self.Tokenizer.getStrToken():
            self.Tokenizer.advance()

    def compileWhile(
            self):  #draft I might have the labeling wrong or something
        #at start token = while
        whileLabel = 'WHILE_EXP' + str(self.while_label_index)
        self.VMWriter.writeLabel(whileLabel)
        self.Tokenizer.advance()  #advances to (
        self.compileExpression()  #writes expression
        self.VMWriter.writeArithmetic('not')
        self.VMWriter.writeIf('WHILE_END' + str(self.while_label_index))
        while_end_index = self.while_label_index
        self.while_label_index += 1
        if '{' in self.Tokenizer.getStrToken():
            self.Tokenizer.advance()

        self.compileStatements()  #writes statements

        self.VMWriter.writeGoto(whileLabel)
        self.VMWriter.writeLabel('WHILE_END' + str(while_end_index))

        self.Tokenizer.advance()  #advance to after } ie done with this while

    def compileReturn(self):  #draft

        #starts at token = return
        self.Tokenizer.advance()
        if ';' not in self.Tokenizer.getToken():
            self.compileExpression()
            self.Tokenizer.advance()  #advance past ;
        else:  #placeholder return
            self.VMWriter.writePush('constant', 0)
            self.Tokenizer.advance()
        self.VMWriter.writeReturn()
        self.op_flag2 = False
        if ';' in self.Tokenizer.getStrToken():
            self.Tokenizer.advance()

    def compileIf(self):  #finished sort of the same as while
        #start token = if

        ifLabel = 'IF_TRUE' + str(self.if_label_index)

        self.compileExpression()

        self.VMWriter.writeIf(ifLabel)
        self.VMWriter.writeGoto('IF_FALSE' + str(self.if_label_index))
        self.VMWriter.writeLabel(ifLabel)
        else_index = self.if_label_index
        self.if_label_index += 1
        if '{' in self.Tokenizer.getStrToken():
            self.Tokenizer.advance()

        self.compileStatements()
        self.Tokenizer.advance()

        if 'else' in self.Tokenizer.getToken():
            self.VMWriter.writeGoto('IF_END' + str(else_index))
            self.VMWriter.writeLabel('IF_FALSE' + str(else_index))
            self.Tokenizer.advance()  #advances to {
            self.Tokenizer.advance()  #advances to statements
            self.compileStatements()
            self.Tokenizer.advance()
            self.VMWriter.writeLabel('IF_END' + str(else_index))
            if '}' in self.Tokenizer.peek():
                self.Tokenizer.advance()
        else:
            self.VMWriter.writeLabel('IF_FALSE' + str(else_index))

        #self.if_label_index

    def compileExpression(self):  #draft

        self.compileTerm()
        self.op_flag2 = False

        self.Tokenizer.advance()

        if self.Tokenizer.isOp():
            if '*' in self.Tokenizer.getStrToken():

                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeCall('Math.multiply', 2)
            if '/' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeCall('Math.divide', 2)
                self.Tokenizer.advance()

            if '+' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('add')
                self.op_flag2 = True

            if '&gt' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('gt')

            if '&amp' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('and')

            if '|' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('or')
            if '&lt' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('lt')
            if '=' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('eq')
            if '-' in self.Tokenizer.getStrToken():
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('sub')

            self.op_flag1 = True

        else:

            self.op_flag1 = False
        ##ie two ops in a row so write this

    def compileTerm(
            self):  #unfinished god almighty this thing is gonna kill me

        if '[' in self.Tokenizer.peek():
            self.array_name = self.Tokenizer.getStrToken()
            self.Tokenizer.advance()  #advances to [
            self.Tokenizer.advance()  #advances to inner term
            self.VMWriter.writePush(
                self.SymbolTable.kindOf(self.Tokenizer.getStrToken()),
                self.SymbolTable.indexOf(self.Tokenizer.getStrToken()))
            self.VMWriter.writePush(self.SymbolTable.kindOf(self.array_name),
                                    self.SymbolTable.indexOf(self.array_name))
            self.Tokenizer.advance()  #advances to another term or ]

            if ']' not in self.Tokenizer.getStrToken():
                self.array_name = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('add')
                self.VMWriter.writePush('temp', 0)
                self.VMWriter.writePop('pointer', 1)
                self.VMWriter.writePush('temp', 0)
                self.VMWriter.writePop('that', 0)
            if ']' in self.Tokenizer.getStrToken():

                self.VMWriter.writeArithmetic('add')
                self.VMWriter.writePop('pointer', 1)

                self.VMWriter.writePush('that', 0)
                self.Tokenizer.advance()
            return

        if 'integerConstant' in self.Tokenizer.getToken():
            if self.op_flag2 == False:
                self.VMWriter.writePush('constant',
                                        self.Tokenizer.getStrToken())

        elif 'stringConstant' in self.Tokenizer.getToken(
        ):  #forums explanation here: http://nand2tetris-questions-and-answers-forum.32033.n3.nabble.com/Project-11-gt-Strings-calling-string-constructor-td4030992.html#a4030993
            string = self.Tokenizer.getToken()
            string = string.replace('<stringConstant> ', '')
            string = string.replace(' </stringConstant>', '')

            self.VMWriter.writePush('constant', len(string))
            self.VMWriter.writeCall('String.new', 1)
            for char in string:

                self.VMWriter.writePush('constant', ord(char))
                self.VMWriter.writeCall('String.appendChar', 2)

        elif 'this' in self.Tokenizer.getStrToken():
            self.VMWriter.writePush('pointer', 0)

        elif 'true' in self.Tokenizer.getStrToken():
            self.VMWriter.writePush('constant', 0)
            self.VMWriter.writeArithmetic('not')

        elif 'false' in self.Tokenizer.getStrToken():
            self.VMWriter.writePush('constant', 0)

        elif '-' in self.Tokenizer.getToken():
            self.Tokenizer.advance()
            self.compileTerm()
            self.VMWriter.writeArithmetic('neg')

        elif '~' in self.Tokenizer.getToken():
            self.Tokenizer.advance()
            self.compileTerm()
            self.VMWriter.writeArithmetic('not')
        elif '(' in self.Tokenizer.getToken():
            self.Tokenizer.advance()  #advances to expression of off (
            self.compileExpression()
            self.Tokenizer.advance()  #advances off of )

        elif self.Tokenizer.getStrToken() in self.SymbolTable.SubSymbolTable:

            self.VMWriter.writePush(
                self.SymbolTable.kindOf(self.Tokenizer.getStrToken()),
                self.SymbolTable.indexOf(self.Tokenizer.getStrToken()))
        elif self.Tokenizer.getStrToken() in self.SymbolTable.ClassSymbolTable:
            if self.op_flag2 == False:
                self.VMWriter.writePush(
                    'this',
                    self.SymbolTable.indexOf(self.Tokenizer.getStrToken()))
        if '[' in self.Tokenizer.getStrToken(
        ):  #recursion here for multiple arrays of arrays

            self.Tokenizer.advance()  #advance off of [ and on to val
            self.VMWriter.writePush(
                self.SymbolTable.kindOf(self.Tokenizer.getStrToken()),
                self.SymbolTable.indexOf(self.Tokenizer.getStrToken()))
            self.VMWriter.writePush(self.SymbolTable.kindOf(self.array_name),
                                    self.SymbolTable.indexOf(self.array_name))
            self.Tokenizer.advance()  #advances to another term or ]

            if ']' not in self.Tokenizer.getStrToken():
                self.array_name = self.Tokenizer.getStrToken()
                self.Tokenizer.advance()
                self.compileTerm()
                self.VMWriter.writeArithmetic('add')
                self.VMWriter.writePush('temp', 0)
                self.VMWriter.writePop('pointer', 1)
                self.VMWriter.writePush('temp', 0)
                self.VMWriter.writePop('that', 0)
            if ']' in self.Tokenizer.getStrToken():
                self.VMWriter.writeArithmetic('add')
                self.Tokenizer.advance()

            print('1: ' + self.Tokenizer.getStrToken() + self.Tokenizer.peek())
        else:  #var dec makes it harder come back to later
            #very possible something could be wrong here
            if '.' in self.Tokenizer.peek():

                self.compileCall()

            if '(' in self.Tokenizer.peek():
                if self.SymbolTable.kindOf(
                        self.Tokenizer.getStrToken) != 'NONE':
                    #self.VMWriter.writePush(self.SymbolTable.kindOf(self.Tokenizer.getStrToken),self.SymbolTable.indexOf(self.Tokenizer.getStrToken))
                    pass
                self.Tokenizer.advance()
                self.Tokenizer.advance()
                self.compileExpression()
                self.Tokenizer.advance()

    def compileArrayExp(self):
        self.compileTerm()
        self.Tokenizer.advance()

    def compileExpressionList(
            self):  #calls compile expression until ; then say im done

        self.running_index = 1
        while ';' not in self.Tokenizer.peek(
        ):  #and ';' not in self.Tokenizer.getStrToken() :
            if '(' in self.Tokenizer.getStrToken(
            ) and ')' in self.Tokenizer.peek():
                return
            if ',' in self.Tokenizer.getToken():
                self.Tokenizer.advance()
                self.running_index += 1

            else:
                self.compileExpression()

        self.op_flag2 = False

        #token at end is )

    def close(self):
        self.outfile.close()
class CompilationEngine(object):
    
    def __init__(self, inStr):
        self.xml = ''
        self.jackFile = JackTokenizer(inStr)
        self.jackFile.advance()

    def compileClass(self):
        # check the first token, return if wrong
        if self.jackFile.token != 'class':
            print("first token needs to be 'class'")
            return ''
        
        self.xml += '<class>'       # open class tag
        self.writeAdv()             # write class keyword   
        self.writeAdv()             # write class name tag
        self.writeAdv()             # write '{'

        # look for variable declarations 
        while (self.jackFile.token == 'static' or self.jackFile.token == 'field') and \
        self.jackFile.tokenType == 'keyword':
            self.compileClassVarDec()

        # look for subroutine declarations 
        while (self.jackFile.token == 'method' or self.jackFile.token == 'function' or \
            self.jackFile.token == 'constructor') and self.jackFile.tokenType == 'keyword':
            self.compileSubroutine()

        # look for '}'
        while not (self.jackFile.token == '}' and self.jackFile.tokenType == 'symbol'):
            self.writeAdv("Expected '}'")
        self.writeAdv()

        # '}' has been hit, end of file. close class tag and return xml string
        if self.jackFile.hasMoreTokens(): print("There is uncompiled code after the class")
        self.xml += '\n</class>'
        return self.xml

    def compileClassVarDec(self):
        self.xml += '\n<classVarDec>'     # open classVarDec tag 
        # loop through until ';' 
        while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'):
            self.writeAdv()
        self.writeAdv()                 # write ';'
        self.xml += '\n</classVarDec>'    # close classVarDec tag
    
    def compileSubroutine(self):
        self.xml += '\n<subroutineDec>'   # open subroutineDec tag
        self.writeAdv()                 # write sub type
        self.writeAdv()                 # write sub return type
        self.writeAdv()                 # write sub name
        
        self.writeAdv()                 # write '('
        self.xml += '\n<parameterList>'   # open parameterList tag
        self.compileParameterList()     # writes the potentially empty parameter list
        self.xml += '\n</parameterList>'  # close parameterList tag
        self.writeAdv()                 # write ')'

        self.xml += '\n<subroutineBody>'  # open subroutineBody tag
        self.writeAdv()                 # write '{'
        
        # look for variable declarations 
        while (self.jackFile.token == 'var') and self.jackFile.tokenType == 'keyword':
            self.compileVarDec()
        
        # write the sub statements
        self.xml += '\n<statements>'      # open statements tag
        self.compileStatements()        # compile all statements
        self.xml += '\n</statements>'     # close statements tag

        self.writeAdv()                 # write '}' (closing the subroutine body)
        self.xml += '\n</subroutineBody>' # close subroutineBody tag
        self.xml += '\n</subroutineDec>'  # close subroutineDec tag

    def compileParameterList(self):
        # loop through until ')' without writing it  
        while not (self.jackFile.token == ')' and self.jackFile.tokenType == 'symbol'):
            self.writeAdv()
    
    def compileVarDec(self):
        self.xml += '\n<varDec>'     # open varDec tag 
        # loop through until ';' 
        while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'):
            self.writeAdv()
        self.writeAdv()                 # write ';'
        self.xml += '\n</varDec>'    # close varDec tag

    def compileStatements(self):
        # look for statements
        while not (self.jackFile.token == '}' and self.jackFile.tokenType == 'symbol'):
            if self.jackFile.token == 'let': self.compileLet()
            elif self.jackFile.token == 'if': self.compileIf()
            elif self.jackFile.token == 'while': self.compileWhile()
            elif self.jackFile.token == 'do': self.compileDo()
            elif self.jackFile.token == 'return': self.compileReturn()
            else:
                self.writeAdv('expected statement')
                return
    
    def compileDo(self):
        self.xml += '\n<doStatement>'     # open doStatement tag 
        # loop through until ';' 
        while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'):
            if self.jackFile.token == '(' and self.jackFile.tokenType == 'symbol':
                self.writeAdv()
                self.compileExpressionList()             
            else:
                self.writeAdv()
        self.writeAdv()                 # write ';'
        self.xml += '\n</doStatement>'    # close doStatement tag

    def compileLet(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        self.xml += '\n<letStatement>'     # open letStatement tag 

        self.writeAdv()     # write 'let'
        self.writeAdv()     # write varName
        if self.jackFile.token == '[':
            self.writeAdv()     # write '['
            self.compileExpression()
            self.writeAdv()     # write ']'
        self.writeAdv()     # write '='
        self.compileExpression()
        self.writeAdv()     #write ';'

        self.xml += '\n</letStatement>'    # close letStatement tag
    
    def compileWhile(self):
        # 'while' '(' expression ')' '{' statements '}' 
        self.xml += '\n<whileStatement>'
        self.writeAdv()     # write 'while'
        self.writeAdv()     # write '('
        self.compileExpression()
        self.writeAdv()     # write ')'
        self.writeAdv()     # write '{'
        self.xml += '\n<statements>'      # open statements tag
        self.compileStatements()
        self.xml += '\n</statements>'      # close statements tag
        self.writeAdv()     # write '}'
        self.xml += '\n</whileStatement>'
    
    def compileReturn(self):
        self.xml += '\n<returnStatement>'     
        self.writeAdv()                 # write 'return'
        # loop through until ';' 
        while not (self.jackFile.token == ';' and self.jackFile.tokenType == 'symbol'):
            self.compileExpression()
        self.writeAdv()                 # write ';'
        self.xml += '\n</returnStatement>'
    
    def compileIf(self):
        # 'if' '(' expression ')' '{' statements '}' 
        # ('else' '{' statements '}')?
        #TODO does not handle else statements
        self.xml += '\n<ifStatement>'
        self.writeAdv()     # write 'if'
        self.writeAdv()     # write '('
        self.compileExpression()
        self.writeAdv()     # write ')'
        self.writeAdv()     # write '{'
        self.xml += '\n<statements>'      # open statements tag
        self.compileStatements()
        self.xml += '\n</statements>'      # close statements tag
        self.writeAdv()     # write '}'
        if self.jackFile.token == 'else':
            self.writeAdv()     # write 'else'
            self.writeAdv()     # write '{'
            self.xml += '\n<statements>'      # open statements tag
            self.compileStatements()
            self.xml += '\n</statements>'      # close statements tag
            self.writeAdv()     # write '}' 
        self.xml += '\n</ifStatement>'


    def compileExpressionList(self):
        self.xml += '\n<expressionList>'
        # loop through until ')' without writing it  
        cont = True
        while cont:
            if self.jackFile.token == ')': 
                cont = False
            elif self.jackFile.token == ',':
                self.writeAdv()
            else:
                self.compileExpression()
        self.xml += '\n</expressionList>'

    def compileExpression(self):
        ###  term (op term)*
        # TODO can't handle unary operaters yet ('-' & '~')
        
        self.xml += '\n<expression>' 

        cont = True
        while cont:
            if self.isTerm() or self.isUnaryOp() or self.jackFile.token == '(':
                self.compileTerm()
                if self.isOp(): self.writeAdv()
            else:
                cont = False

        self.xml += '\n</expression>'
    
    def compileTerm(self):
        # this is the hard one that needs to look ahead
        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall |
        # '(' expression ')' | unaryOp term
        self.xml += '\n<term>' 
        
        if self.isUnaryOp():            # account for unary operators
            self.writeAdv()         
            self.compileTerm()
        elif self.isTerm() and self.jackFile.peek() == '(':
            self.writeAdv()     # write 'term'
            self.writeAdv()     # write '('
            self.compileExpressionList()
            self.writeAdv()     # write ')'
        elif self.isTerm() and self.jackFile.peek() == '.' and self.jackFile.peek(3) == '(':
            self.writeAdv()     # write 'term'
            self.writeAdv()     # write '.'
            self.writeAdv()     # write 'term'
            self.writeAdv()     # write '('
            self.compileExpressionList()
            self.writeAdv()     # write ')'
        elif self.isTerm() and self.jackFile.peek() == '[':
            self.writeAdv()     # write 'term'
            self.writeAdv()     # write '['
            self.compileExpression()
            self.writeAdv()     # write ']'
        elif self.jackFile.token == '(':
            self.writeAdv()     # write '('
            self.compileExpression()
            self.writeAdv()     # write ')'
        else:
            self.writeAdv()     # write 'term'

        self.xml += '\n</term>' 
    
    def writeAdv(self, *err):
        if err:
            print(err[0])
            self.xml += '\n<error>' + err[0] + '</error>'
            self.jackFile.advance()
        else:
            if self.jackFile.token == '<': self.jackFile.token = '&lt;'
            if self.jackFile.token == '>': self.jackFile.token = '&gt;'
            if self.jackFile.token == '&': self.jackFile.token = '&amp;'
            self.xml += '\n<' + self.jackFile.tokenType + '> ' + self.jackFile.token + ' </' + self.jackFile.tokenType + '>'
            self.jackFile.advance()

    def isOp(self):
        operators = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
        return self.jackFile.token in operators
        
    def isUnaryOp(self):
        unaryOperators = ['-', '~']
        return self.jackFile.token in unaryOperators
    
    def isSymbol(self):
        symbols = ['{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '=', '~']
        return self.jackFile.token in symbols

    def isTermEnd(self):
        operators = ['+', '-', '*', '/', '&' '|', '<', '>', '=']
        termFinshers = [' ', ')', ']', ';'] +  operators
        return self.jackFile.token in termFinshers
    
    def isTerm(self):
        terms = ['keyword', 'identifier', 'integerConstant', 'stringConstant']
        return self.jackFile.tokenType in terms