Esempio n. 1
0
class CompilationEngine:
	"""
	generates the compilers output
	"""

	def __init__(self, input_file, output_file):
		"""
		the constructor of the class
		:param input_file: the jack file that the user want to compile
		:param output_file: the path for the output xml file
		"""
		self.label_count = 0
		self.file_reader = JackFileReader(input_file)
		self.jack_tokens = JackTokenizer(self.file_reader.get_one_liner())
		self.curr_token = self.jack_tokens.advance()
		self.to_output_file = []
		self.symbol_table = SymbolTable()
		self.vm_writer = VMWriter(output_file)
		self.class_name = None
		self.compile_class()
		self.vm_writer.close()

	def compile_class(self):
		"""
		Compiles a complete class.
		"""
		# advancing beyond 'class'
		self.next_token()
		# assign class name
		self.class_name = self.next_token()
		# advancing beyond '{'
		self.next_token()
		# zero or more times
		while self.curr_token.split()[1] in VAR_DECS:
			self.compile_class_var_dec()
		# zero or more times
		while self.curr_token.split()[1] in SUB_ROUTINES:
			self.compile_subroutine_dec()
		# advancing beyond '}'
		self.next_token()
		return

	def compile_class_var_dec(self):
		"""
		Compiles a static declaration or a field declaration.
		:return:
		"""
		# compiles a static variable declaration, or a field declaration
		# ('static' | 'field' ) type varName (',' varName)* ';'
		var_kind = self.next_token()
		var_type = self.next_token()
		var_name = self.next_token()
		self.symbol_table.define(var_name, var_type, var_kind)
		while self.curr_token.split()[1] == COMMA:
			# advancing the COMMA
			self.next_token()
			var_name = self.next_token()
			self.symbol_table.define(var_name, var_type, var_kind)
		# advance beyond ;
		self.next_token()
		return

	def compile_subroutine_dec(self):
		"""
		Compiles a complete method, function, or constructor.
		:return:
		"""
		self.symbol_table.start_subroutine()
		# constructor \ function \ method
		subroutine_type = self.next_token()
		# advance the return type
		self.next_token()
		# subroutine name
		subroutine_name = self.class_name + "." + self.next_token()
		# advance the left brackets
		self.next_token()
		if subroutine_type == "method":
			self.symbol_table.define("this", self.class_name, ARG)
		self.compile_parameters_list()
		self.vm_writer.write_function(subroutine_name, self.count_var_dec())

		if subroutine_type == "constructor":
			field_vars_num = self.get_num_of_field_vars()
			self.vm_writer.write_push("constant", field_vars_num)
			self.vm_writer.write_call("Memory.alloc", 1)
			self.vm_writer.write_pop("pointer", 0)

		if subroutine_type == "method":
			self.vm_writer.write_push("argument", 0)
			self.vm_writer.write_pop("pointer", 0)

		# advance the right brackets
		self.next_token()
		self.compile_subroutine_body()

	def get_num_of_field_vars(self):
		field_vars_num = 0
		for var in self.symbol_table.class_symbol_table.values():
			if var[1] == "field":
				field_vars_num += 1
		return field_vars_num

	def compile_parameters_list(self):
		"""
		Compiles a (possibly empty) parameter list, not including the enclosing ().
		:return:
		"""
		num_of_par = 0
		if self.curr_token.split()[1] != RIGHT_BRACKETS:
			# type
			num_of_par += 1
			par_type = self.next_token()
			par_name = self.next_token()
			self.symbol_table.define(par_name, par_type, ARG)
			while self.curr_token.split()[1] == COMMA:
				# advance pass the comma:
				num_of_par += 1
				self.next_token()
				par_type = self.next_token()
				par_name = self.next_token()
				self.symbol_table.define(par_name, par_type, ARG)
		return num_of_par

	def count_var_dec(self):
		"""
		counts the number of variables the subroutine has
		:return:
		"""
		var_count = 0
		temp_pointer = self.jack_tokens.curr_token + 1
		# 'var' type varName (',' varName)* ';'
		while self.jack_tokens.list_of_tokens[temp_pointer].split()[1] == "var":
			var_count += 1
			# skip var type varName
			temp_pointer = temp_pointer + 3
			while self.jack_tokens.list_of_tokens[temp_pointer].split()[1] == COMMA:
				var_count += 1
				temp_pointer += 2
			# advance passed ;
			temp_pointer += 1
		return var_count

	def compile_subroutine_body(self):
		"""
		compiles the subroutine body
		:return:
		"""
		# pass the left curly brackets
		self.next_token()
		while self.curr_token.split()[1] == "var":
			self.compile_var_dec()
		self.compile_statements()
		# pass the right curly brackets
		self.next_token()

	def compile_var_dec(self):
		"""
		Compiles a var declaration.
		:return:
		"""
		# advance passed "var"
		self.next_token()
		var_type = self.next_token()
		var_name = self.next_token()
		self.symbol_table.define(var_name, var_type, LCL)
		while self.curr_token.split()[1] == COMMA:
			# advance passed COMMA
			self.next_token()
			var_name = self.next_token()
			self.symbol_table.define(var_name, var_type, LCL)
		# advance passed ;
		self.next_token()
		return

	def compile_statements(self):
		statements = True
		while statements:
			statement_type = self.curr_token.split()[1]
			if statement_type == "let":
				self.compile_let()
			elif statement_type == "if":
				self.compile_if()
			elif statement_type == "while":
				self.compile_while()
			elif statement_type == "do":
				self.compile_do()
			elif statement_type == "return":
				self.compile_return()
			else:
				statements = False

	def compile_let(self):
		"""
		Compiles a let statement.
		:return:
		"""
		# advances passed let
		self.next_token()
		# var name
		var_name = self.next_token()
		var_kind = self.symbol_table.kind_of(var_name)
		if var_kind == "field":
			var_kind = "this"
		var_index = self.symbol_table.index_of(var_name)
		# for varName[] case
		list_flag = False
		if self.curr_token.split()[1] == LEFT_SQUARE_BRACKETS:
			list_flag = True
			# advance brackets
			self.next_token()
			self.compile_expression()
			self.vm_writer.write_push(var_kind, var_index)
			self.vm_writer.write_arithmetic("+")
			# advance brackets
			self.next_token()

		# advance equal sign
		self.next_token()
		self.compile_expression()
		if list_flag:
			# the value of expression 2
			self.vm_writer.write_pop("temp", 0)
			self.vm_writer.write_pop("pointer", 1)
			self.vm_writer.write_push("temp", 0)
			self.vm_writer.write_pop("that", 0)
		else:
			self.vm_writer.write_pop(var_kind, var_index)

		# advance semi colon
		self.next_token()

	def compile_if(self):
		"""
		Compiles a if statement.
		:return:
		"""
		# advance the if
		self.next_token()
		# advance the left brackets
		self.next_token()
		self.compile_expression()
		# self.vm_writer.write_arithmetic(NOT)
		label_1 = self.next_label()
		self.vm_writer.write_if(label_1)
		label_2 = self.next_label()
		self.vm_writer.write_goto(label_2)
		# label if true
		self.vm_writer.write_label(label_1)
		# advance the right brackets
		self.next_token()

		# advance the left curly brackets
		self.next_token()

		self.compile_statements()

		# advance the right curly brackets
		self.next_token()
		# label if false

		if self.curr_token.split()[1] == "else":
			label_3 = self.next_label()
			self.vm_writer.write_goto(label_3)
			self.vm_writer.write_label(label_2)
			# advance the else
			self.next_token()
			# advance the left curly brackets
			self.next_token()
			self.compile_statements()
			# advance the right curly brackets
			self.next_token()
			self.vm_writer.write_label(label_3)

		else:
			self.vm_writer.write_label(label_2)

	def compile_while(self):
		"""
		Compiles a while statement.
		:return:
		"""
		# advance the while
		self.next_token()
		# advance the left brackets
		self.next_token()
		label_1 = self.next_label()
		self.vm_writer.write_label(label_1)
		self.compile_expression()
		self.vm_writer.write_arithmetic(NOT)
		label_2 = self.next_label()
		self.vm_writer.write_if(label_2)
		# advance the right brackets
		self.next_token()
		# advance the left curly brackets
		self.next_token()
		self.compile_statements()
		self.vm_writer.write_goto(label_1)
		self.vm_writer.write_label(label_2)
		# advance the right curly brackets
		self.next_token()

	def compile_subroutine_call(self):

		subroutine_name = self.next_token()
		kind = self.symbol_table.kind_of(subroutine_name)
		if kind == "field":
			kind = "this"
		index = self.symbol_table.index_of(subroutine_name)

		from_class = False
		if self.curr_token.split()[1] == ".":
			# advance the dot
			self.next_token()
			type_ = self.symbol_table.type_of(subroutine_name)
			if type_:
				subroutine_name = type_ + "." + self.next_token()
			else:
				subroutine_name = subroutine_name + "." + self.next_token()
		else:
			from_class = True
			subroutine_name = self.class_name + "." + subroutine_name
		# advance the brackets
		self.next_token()
		num_of_arguments = 0
		if from_class:
			self.vm_writer.write_push("pointer", 0)
			num_of_arguments = 1
		if kind is not None and index is not None:
			self.vm_writer.write_push(kind, index)
			num_of_arguments = 1
		num_of_arguments += self.compile_expression_list()
		# advance the brackets
		self.next_token()

		self.vm_writer.write_call(subroutine_name, num_of_arguments)

	def compile_do(self):
		"""
		Compiles a do statement.
		:return:
		"""
		# advance the do
		self.next_token()

		# subroutine call:
		self.compile_subroutine_call()
		self.vm_writer.write_pop("temp", 0)
		# advance the semi colon
		self.next_token()

	def compile_return(self):
		"""
		Compiles a return statement.
		:return:
		"""
		# advance the return
		self.next_token()
		if self.curr_token.split()[1] != SEMI_COLON:
			if self.curr_token.split()[1] == "this":
				self.vm_writer.write_push("pointer", 0)
				self.next_token()
			else:
				self.compile_expression()
		else:
			# default
			self.vm_writer.write_push("constant", 0)
		self.vm_writer.write_return()
		# advance the semi colon
		self.next_token()

	def compile_expression(self):
		"""
		Compiles a do statement.
		:return:
		"""
		self.compile_term()
		while self.curr_token.split()[1] in Syntax.operators:
			op = self.curr_token.split()[1]
			self.next_token()
			self.compile_term()
			self.compile_op(op)
		return

	def compile_op(self, op):
		if op == "*":
			self.vm_writer.write_call("Math.multiply", 2)
		elif op == "/":
			self.vm_writer.write_call("Math.divide", 2)
		else:
			self.vm_writer.write_arithmetic(op)

	def compile_term(self):
		"""
		Compiles a term. This routine is faced with a slight difficulty when trying to decide between
		some of the alternative parsing rules. Specifically, if the current token is an
		identifier, the routine must distinguish between a variable, an array entry, and a
		subroutine call. A single look-ahead token, which may be one of [, (, or .  suffices to distinguish
		between the three possibilities. Any other token is not part of this term and should not be advanced over.
		:return:
		"""
		all_ = self.curr_token.split()
		header = all_[0]
		val = all_[1]
		# handle case of stringConstant, integerConstant, keyword
		if header == "<integerConstant>":
			self.vm_writer.write_push("constant", val)
			self.next_token()
		# handle in case of (expression)
		elif val == LEFT_BRACKETS:
			# advance passed "("
			self.next_token()
			self.compile_expression()
			# advance passed ")"
			self.next_token()
		# case of  onary Op
		elif val in ONARY_OP:
			self.next_token()
			self.compile_term()
			if val == "-":
				self.vm_writer.write_arithmetic(NEG)
			else:
				self.vm_writer.write_arithmetic(NOT)
		elif header == IDENTIFIER:
			next_token = self.jack_tokens.peek().split()[1]
			if next_token == LEFT_SQUARE_BRACKETS:
				# skip name and "["
				self.next_token()
				self.next_token()
				self.compile_expression()
				kind = self.symbol_table.kind_of(val)
				if kind == "field":
					kind = "this"
				self.vm_writer.write_push(kind, self.symbol_table.index_of(val))
				self.vm_writer.write_arithmetic("+")
				# skip over "]"
				self.next_token()
				self.vm_writer.write_pop("pointer", 1)
				self.vm_writer.write_push("that", 0)

			# subroutine call: subroutineName(expressionList)
			elif next_token == LEFT_BRACKETS or next_token == ".":
				self.compile_subroutine_call()
			else:
				kind = self.symbol_table.kind_of(val)
				if kind == "field":
					kind = "this"
				self.vm_writer.write_push(kind, self.symbol_table.index_of(val))
				self.next_token()

		elif header == "<keyword>":
			if val == "this":
				self.vm_writer.write_push("pointer", 0)
			else:
				self.vm_writer.write_push("constant", 0)
				if val == "true":
					self.vm_writer.write_arithmetic(NOT)
			self.next_token()

		elif header == "<stringConstant>":
			the_string = self.curr_token[17:-18]
			self.vm_writer.write_push("constant", len(the_string))
			self.vm_writer.write_call("String.new", 1)
			for char in the_string:
				self.vm_writer.write_push("constant", ord(char))
				self.vm_writer.write_call("String.appendChar", 2)
			self.next_token()

		return

	def compile_expression_list(self):
		"""
		Compiles a (possibly empty) comma separated list of expressions.
		:return:
		"""
		num_of_arguments = 0
		if self.curr_token.split()[1] != RIGHT_BRACKETS:
			num_of_arguments += 1
			self.compile_expression()
			while self.curr_token.split()[1] == COMMA:
				num_of_arguments += 1
				# advance comma
				self.next_token()
				self.compile_expression()
		return num_of_arguments

	def next_token(self):
		to_return = self.curr_token.split()[1]
		self.curr_token = self.jack_tokens.advance()
		return to_return

	def next_label(self):
		count = self.label_count
		self.label_count += 1
		return "LABEL" + str(count)
class CompilationEngine(object):
    def __init__(self, inFile):
        self.t = Tokenizer(inFile)
        self.symTable = SymbolTable()
        self.vmName = inFile.rstrip('.jack') + '.vm'
        self.vm = VMWriter(self.vmName)
        self.className = ''
        self.types = ['int', 'char', 'boolean', 'void']
        self.stmnt = ['do', 'let', 'if', 'while', 'return']
        self.subroutType = ''
        self.whileIndex = 0
        self.ifIndex = 0
        self.fieldNum = 0

    def compile_class(self):

        self.t.advance()
        self.validator('class')
        self.className = self.t.current_token()
        self.t.advance()
        self.validator('{')
        self.fieldNum = self.compile_class_var_dec()
        while self.t.symbol() != '}':  # subroutines
            self.compile_subroutine()
        self.validator('}')
        self.vm.close()

        return

    def compile_class_var_dec(self):
        varKeyWords = ['field', 'static']
        name = ''
        kind = ''
        varType = ''
        counter = 0
        while self.t.keyword() in varKeyWords:
            kind = self.t.current_token()
            self.validator(varKeyWords)
            # variable type
            varType = self.t.current_token()
            self.validator(['int', 'char', 'boolean', 'IDENTIFIER'])
            name = self.t.current_token()
            self.symTable.define(name, varType, kind)
            self.t.advance()
            if kind == 'field':
                counter += 1

            while self.t.symbol() != ';':  # checks multiple vars
                self.validator(',')
                name = self.t.current_token()
                self.symTable.define(name, varType, kind)
                self.t.advance()
                if kind == 'field':
                    counter += 1
            self.validator(';')

        return counter

    def compile_subroutine(self):
        current_subrout_scope = self.symTable.subDict
        self.symTable.start_subroutine()

        subroutKword = self.t.current_token()
        self.validator(['constructor', 'function', 'method'])

        self.subroutType = self.t.current_token()
        self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])

        name = self.t.current_token()
        subroutName = self.className + '.' + name
        self.t.advance()
        self.validator('(')

        if subroutKword == 'method':
            self.compile_parameter_list(method=True)
        else:
            self.compile_parameter_list()

        self.validator(')')
        self.validator('{')

        if self.t.symbol() == '}':
            self.t.advance()

            return

        self.validator(['var', 'let', 'do', 'if', 'while', 'return'],
                       advance=False)
        numLocals = 0
        if self.t.keyword() == 'var':
            numLocals = self.compile_var_dec()

        self.vm.write_function(subroutName, numLocals)

        if subroutKword == 'constructor':
            self.vm.write_push('constant', self.fieldNum)
            self.vm.write_call('Memory.alloc', 1)
            self.vm.write_pop('pointer', 0)
        elif subroutKword == 'method':
            self.vm.write_push('argument', 0)
            self.vm.write_pop('pointer', 0)

        if self.t.keyword() in self.stmnt:
            self.compile_statements()

        self.validator('}')
        self.symTable.subDict = current_subrout_scope
        self.whileIndex = 0
        self.ifIndex = 0

        return

    def compile_parameter_list(self, method=False):
        name = ''
        varType = ''
        kind = ''
        counter = 0

        if self.t.symbol() == ')':
            return counter
        varType = self.t.current_token()
        self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])
        kind = 'arg'
        name = self.t.current_token()
        if method:
            self.symTable.define(name, varType, kind, method=True)
        else:
            self.symTable.define(name, varType, kind)

        self.t.advance()
        counter += 1
        while self.t.symbol() == ',':
            self.validator(',')
            self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])
            kind = 'arg'
            name = self.t.current_token()
            self.symTable.define(name, varType, kind)
            self.t.advance()
            counter += 1

        return counter

    def compile_var_dec(self):
        name = ''
        kind = ''
        varType = ''
        counter = 0

        while self.t.keyword() == 'var':  # check multiple lines of var
            kind = 'var'
            self.t.advance()
            varType = self.t.current_token()
            self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])
            name = self.t.current_token()
            self.symTable.define(name, varType, kind)
            self.t.advance()
            counter += 1

            while self.t.symbol() == ',':  # multiple varNames
                self.t.advance()
                name = self.t.current_token()
                self.symTable.define(name, varType, kind)
                self.t.advance()
                counter += 1
            self.validator(';')

        return counter

    def compile_statements(self):

        while self.t.keyword() in self.stmnt:
            if self.t.keyword() == 'let':
                self.compile_let()
            elif self.t.keyword() == 'do':
                self.compile_do()
            elif self.t.keyword() == 'if':
                self.compile_if()
            elif self.t.keyword() == 'while':
                self.compile_while()
            elif self.t.keyword() == 'return':
                self.compile_return()
            else:
                raise Exception(self.t.current_token() + ' is not valid')

        return

    def compile_do(self):
        lookAhead = ''
        self.t.advance()  # do
        lookAhead = self.t.tokens[self.t.tokenIndex + 1]

        if lookAhead == '(':  # subroutineName(exprlist)
            subroutName = self.className + '.' + self.t.current_token()
            self.t.advance()
            self.validator('(')

            self.vm.write_push('pointer', 0)
            numArgs = self.compile_expression_list()
            self.vm.write_call(subroutName, numArgs + 1)  # add 1 for 'this'

            self.validator(')')
            self.validator(';')
            self.vm.write_pop('temp', 0)  # throws away returned value

            return
        else:
            className = self.t.current_token()
            self.t.advance()
            self.validator('.')  # name.subroutine(exprList)
            subroutName = self.t.current_token()
            self.t.advance()
            self.validator('(')

            if self.symTable.kind_of(className) in [
                    'this', 'static', 'local', 'argument'
            ]:
                # used 'this' for 'field'
                typeName = self.symTable.type_of(className)
                subroutName = typeName + '.' + subroutName
                segment = self.symTable.kind_of(className)
                index = self.symTable.index_of(className)
                self.vm.write_push(segment, index)
                numArgs = self.compile_expression_list()
                self.vm.write_call(subroutName, numArgs + 1)
            else:
                subroutName = className + '.' + subroutName
                numArgs = self.compile_expression_list()
                self.vm.write_call(subroutName, numArgs)

            self.validator(')')
            self.validator(';')
            self.vm.write_pop('temp', 0)

            return

    def compile_let(self):
        name = ''
        kind = ''
        array = False
        self.t.advance()  # let
        while self.t.symbol() != ';':
            name = self.t.identifier()
            kind = self.symTable.kind_of(name)
            index = self.symTable.index_of(name)
            if name in self.symTable.classDict:
                self.t.advance()
            elif name in self.symTable.subDict:
                self.t.advance()
            else:
                raise Exception(self.t.identifier() + ' is not defined')
            if self.t.symbol() == '[':  # array index
                array = True
                self.vm.write_push(kind, index)
                self.validator('[')
                self.compile_expression()
                self.validator(']')
                self.vm.write_arithmetic('+')

            self.validator('=')
            self.compile_expression()
            if array:
                self.vm.write_pop('temp', 0)
                self.vm.write_pop('pointer', 1)
                self.vm.write_push('temp', 0)
                self.vm.write_pop('that', 0)
            else:
                self.vm.write_pop(kind, index)
        self.validator(';')

        return

    def compile_while(self):
        currentWhile = 'WHILE' + str(self.whileIndex)
        self.vm.write_label(currentWhile)
        self.whileIndex += 1
        self.t.advance()  # while
        self.validator('(')

        self.compile_expression()
        self.vm.write_arithmetic('~')
        self.vm.write_if('END' + currentWhile)

        self.validator(')')
        self.validator('{')

        self.compile_statements()
        self.vm.write_goto(currentWhile)

        self.validator('}')
        self.vm.write_label('END' + currentWhile)

        return

    def compile_return(self):
        self.t.advance()  # return
        if self.t.symbol() == ';':
            self.vm.write_push('constant', '0')
            self.vm.write_return()
            self.t.advance()
        else:
            self.compile_expression()
            self.validator(';')
            self.vm.write_return()

        return

    def compile_if(self):
        endIf = 'END_IF' + str(self.ifIndex)
        currentElse = 'IF_ELSE' + str(self.ifIndex)
        self.ifIndex += 1
        self.t.advance()  # if
        self.validator('(')
        self.compile_expression()
        self.vm.write_arithmetic('~')
        self.vm.write_if(currentElse)

        self.validator(')')
        self.validator('{')

        self.compile_statements()
        self.vm.write_goto(endIf)
        self.validator('}')
        self.vm.write_label(currentElse)

        if self.t.keyword() == 'else':
            self.t.advance()  # else
            self.validator('{')

            self.compile_statements()

            self.validator('}')
        self.vm.write_label(endIf)

        return

    def compile_expression(self):
        op = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
        self.compile_term()
        while self.t.symbol() in op:
            opToken = self.t.current_token()
            self.t.advance()
            self.compile_term()
            self.vm.write_arithmetic(opToken)

        return

    def compile_term(self):

        keyConst = ['true', 'false', 'null', 'this']
        unOps = ['-', '~']
        lookAhead = ''
        name = ''
        current_subrout_scope = ''

        if self.t.token_type() == 'INT_CONST':
            self.vm.write_push('constant', self.t.int_val())
            self.t.advance()
        elif self.t.token_type() == 'STRING_CONST':
            string = self.t.string_val()
            length = len(string)
            self.vm.write_push('constant', length)
            self.vm.write_call('String.new', 1)
            for char in string:
                char = ord(char)  # gives the ASCII number
                self.vm.write_push('constant', char)
                self.vm.write_call('String.appendChar', 2)
            self.t.advance()

        elif self.t.token_type() == 'KEYWORD':
            self.validator(keyConst, advance=False)
            if self.t.current_token() in ['false', 'null']:
                self.t.advance()
                self.vm.write_push('constant', '0')
            elif self.t.current_token() == 'true':
                self.vm.write_push('constant', '1')
                self.vm.write_arithmetic('-', neg=True)
                self.t.advance()
            else:
                self.vm.write_push('pointer', '0')
                self.t.advance()

        elif self.t.token_type() == 'SYMBOL':
            if self.t.symbol() in unOps:  # unary operator
                unOpToken = self.t.current_token()
                self.t.advance()
                self.compile_term()
                self.vm.write_arithmetic(unOpToken, neg=True)
            elif self.t.symbol() == '(':  # (expression))
                self.t.advance()
                self.compile_expression()
                self.t.advance()
            else:
                raise Exception(self.t.current_token() + ' is not valid')
        elif self.t.token_type() == 'IDENTIFIER':  # varName, array, or subcall
            lookAhead = self.t.tokens[self.t.tokenIndex + 1]
            if lookAhead == '[':  # array item
                name = self.t.identifier()
                kind = self.symTable.kind_of(name)
                index = self.symTable.index_of(name)
                if name in self.symTable.classDict:
                    self.t.advance()
                elif name in self.symTable.subDict:
                    self.t.advance()
                else:
                    raise Exception(self.t.identifier() + ' is not defined')
                self.vm.write_push(kind, index)
                self.validator('[')
                self.compile_expression()

                self.vm.write_arithmetic('+')
                self.vm.write_pop('pointer', 1)
                self.vm.write_push('that', 0)

                self.validator(']')

            elif lookAhead == '(':  # subcall
                current_subrout_scope = self.symTable.subDict
                name = self.className + '.' + self.t.current_token()
                self.t.advance()
                self.validator('(')
                numArgs = self.compile_expression_list()
                self.vm.write_call(name, numArgs + 1)
                self.validator(')')
                self.symTable.subDict = current_subrout_scope

            elif lookAhead == '.':  # name.subroutName(expressList)
                current_subrout_scope = self.symTable.subDict
                className = self.t.current_token()
                self.t.advance()
                self.validator('.')
                subroutName = self.t.current_token()
                self.validator('IDENTIFIER')
                name = className + '.' + subroutName
                self.validator('(')
                if self.symTable.kind_of(className) in [
                        'this', 'static', 'local', 'argument'
                ]:
                    # used 'this' for 'field'
                    classType = self.symTable.type_of(className)
                    name = classType + '.' + subroutName
                    kind = self.symTable.kind_of(className)
                    index = self.symTable.index_of(className)
                    self.vm.write_push(kind, index)
                    numArgs = self.compile_expression_list()
                    self.vm.write_call(name, numArgs + 1)
                else:
                    numArgs = self.compile_expression_list()
                    self.vm.write_call(name, numArgs)
                self.validator(')')
                self.symTable.subDict = current_subrout_scope
            else:
                name = self.t.identifier()  # varName
                kind = self.symTable.kind_of(name)
                index = self.symTable.index_of(name)
                self.vm.write_push(kind, index)
                self.t.advance()
        else:
            raise Exception(self.t.current_token() + ' is not valid')

        return

    def compile_expression_list(self):  # only in subroutineCall
        counter = 0
        if self.t.symbol() == ')':
            return counter
        else:
            self.compile_expression()
            counter += 1
            while self.t.symbol() == ',':
                self.t.advance()
                self.compile_expression()
                counter += 1

        return counter

    def validator(self, syntax, advance=True):
        tokenType = self.t.token_type()
        token = self.t.current_token()
        if advance:
            self.t.advance()
        if type(syntax) != list:
            syntax = [syntax]
        for item in syntax:
            if item in [tokenType, token]:
                return True
        raise Exception(self.t.current_token() + ' is not valid')
Esempio n. 3
0
class CompilationEngine():
    """
    compila un archivo fuente jack desde un tokenizador jack en formato xml en output_file
    """
    SYMBOL_KINDS = {
        'parameter_list': 'argument',
        'var_dec': 'local'
    }
    TOKENS_THAT_NEED_LABELS = ['if', 'while']
    TERMINATING_TOKENS = {
        'class': ['}'],
        'class_var_dec': [';'],
        'subroutine': ['}'],
        'parameter_list': [')'],
        'expression_list': [')'],
        'statements': ['}'],
        'do': [';'],
        'let': [';'],
        'while': ['}'],
        'if': ['}'],
        'var_dec': [';'],
        'return': [';'],
        'expression': [';', ')', ']', ','],
        'array': [']']
    }
    STARTING_TOKENS = {
        'var_dec': ['var'],
        'parameter_list': ['('],
        'subroutine_body': ['{'],
        'expression_list': ['('],
        'expression': ['=', '[', '('],
        'array': ['['],
        'conditional': ['if', 'else']
    }

    def __init__(self, tokenizer, output_file):
        self.tokenizer = tokenizer
        self.output_file = output_file
        self.class_symbol_table = SymbolTable()
        self.subroutine_symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)
        self.label_counter = LabelCounter(labels=self.TOKENS_THAT_NEED_LABELS)
        self.class_name = None

    def compile_class(self):
        """
        lo basico pa compilar la clase
        """
        # omitimos todo para comenzar la clase
        while not self.tokenizer.class_token_reached():
            self.tokenizer.advance()
        # variable de instancia
        self.class_name = self.tokenizer.next_token.text

        while self.tokenizer.has_more_tokens:
            self.tokenizer.advance()

            if self.tokenizer.current_token.starts_class_var_dec():
                self.compile_class_var_dec()
            elif self.tokenizer.current_token.starts_subroutine():
                self.compile_subroutine()

    def compile_class_var_dec(self):
        symbol_kind = self.tokenizer.keyword()

        # obtenemos el tipo del simbolo
        self.tokenizer.advance()
        symbol_type = self.tokenizer.keyword()

        # obtenemos todos los identificadores
        while self._not_terminal_token_for('class_var_dec'):
            self.tokenizer.advance()

            if self.tokenizer.identifier():
                # agregamos los simbolos de clase
                symbol_name = self.tokenizer.identifier()
                self.class_symbol_table.define(
                    name=symbol_name,
                    kind=symbol_kind,
                    symbol_type=symbol_type
                )

    def compile_subroutine(self):
        #  nueva subrutina significa nuevo alcance
        self.subroutine_symbol_table.reset()
        # obtenemos el nombre de la subrutina
        self.tokenizer.advance()
        self.tokenizer.advance()
        subroutine_name = self.tokenizer.current_token.text

        # compilamos la lista de parametros
        self.tokenizer.advance()
        self.compile_parameter_list()

        # compilamos el cuerpo
        self.tokenizer.advance()
        self.compile_subroutine_body(subroutine_name=subroutine_name)

        # reset
        self.label_counter.reset_counts()

    def compile_subroutine_body(self, subroutine_name):
        # saltamos el inicio
        self.tokenizer.advance()
        # obtenemos todas las locales
        num_locals = 0
        while self._starting_token_for('var_dec'):
            num_locals += self.compile_var_dec()
            self.tokenizer.advance()

        # escribimos el comando de funcion
        self.vm_writer.write_function(
            name='{}.{}'.format(self.class_name, subroutine_name),
            num_locals=num_locals
        )

        # compilamos todas las declaraciones
        while self._not_terminal_token_for('subroutine'):
            self.compile_statements()

    def compile_parameter_list(self):
        # tabla de simbolos
        while self._not_terminal_token_for('parameter_list'):
            self.tokenizer.advance()

            if self.tokenizer.next_token.is_identifier():
                symbol_kind = self.SYMBOL_KINDS['parameter_list']
                symbol_type = self.tokenizer.current_token.text
                symbol_name = self.tokenizer.next_token.text
                self.subroutine_symbol_table.define(
                    name=symbol_name,
                    kind=symbol_kind,
                    symbol_type=symbol_type
                )

    def compile_var_dec(self):

        self.tokenizer.advance()

        symbol_type = self.tokenizer.current_token.text

        num_vars = 0

        # obtenemos todas las variables
        while self._not_terminal_token_for('var_dec'):
            self.tokenizer.advance()

            if self.tokenizer.identifier():
                num_vars += 1
                symbol_kind = self.SYMBOL_KINDS['var_dec']
                symbol_name = self.tokenizer.identifier()
                self.subroutine_symbol_table.define(
                    name=symbol_name,
                    kind=symbol_kind,
                    symbol_type=symbol_type
                )
        # return a las variables procesadas
        return num_vars

    def compile_statements(self):

        statement_compile_methods = {
            'if': self.compile_if,
            'do': self.compile_do,
            'let': self.compile_let,
            'while': self.compile_while,
            'return': self.compile_return
        }

        while self._not_terminal_token_for('subroutine'):
            if self.tokenizer.current_token.is_statement_token():
                statement_type = self.tokenizer.current_token.text
                statement_compile_methods[statement_type]()

            self.tokenizer.advance()

    def compile_do(self):

        self.tokenizer.advance()
        caller_name = self.tokenizer.current_token.text
        symbol = self._find_symbol_in_symbol_tables(symbol_name=caller_name)
        self.tokenizer.advance()
        self.tokenizer.advance()
        subroutine_name = self.tokenizer.current_token.text

        if symbol:
            segment = 'local'
            index = symbol['index']
            symbol_type = symbol['type']
            self.vm_writer.write_push(segment=segment, index=index)
        else:  # es decir llamada al os
            symbol_type = caller_name

        subroutine_call_name = symbol_type + '.' + subroutine_name
        # iniciamos la lista de expresion
        self.tokenizer.advance()
        # obtenemos los argumentos en la lista de expresion
        num_args = self.compile_expression_list()
        # method call
        if symbol:
            # llamando al objeto pasado como un argumento implicito
            num_args += 1
        # escribimos la llamada
        self.vm_writer.write_call(name=subroutine_call_name, num_args=num_args)
        self.vm_writer.write_pop(segment='temp', index='0')

    def compile_let(self):

        # obtener símbolo para almacenar evaluación de expresión
        self.tokenizer.advance()
        symbol_name = self.tokenizer.current_token.text
        symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name)

        array_assignment = self._starting_token_for(keyword_token='array', position='next')
        if array_assignment:
            # llegar a la expresión de índice
            self.tokenizer.advance()
            self.tokenizer.advance()
            # lo compilamos
            self.compile_expression()
            self.vm_writer.write_push(segment=symbol['kind'], index=symbol['index'])
            self.vm_writer.write_arithmetic(command='+')

        while not self.tokenizer.current_token.text == '=':
            self.tokenizer.advance()
        # compila todas las expresiones
        while self._not_terminal_token_for('let'):
            self.tokenizer.advance()
            self.compile_expression()

        if not array_assignment:
            # almacenar evaluación de expresión en la ubicación del símbolo
            self.vm_writer.write_pop(segment=symbol['kind'], index=symbol['index'])
        else:

            self.vm_writer.write_pop(segment='temp', index='0')

            self.vm_writer.write_pop(segment='pointer', index='1')

            self.vm_writer.write_push(segment='temp', index='0')

            self.vm_writer.write_pop(segment='that', index='0')

    def compile_while(self):

        # escribimos la etiqueta while
        self.vm_writer.write_label(
            label='WHILE_EXP{}'.format(self.label_counter.get('while'))
        )

        # avanzar al inicio (
        self.tokenizer.advance()
        self.tokenizer.advance()

        # compilamos la expresion dentro ()
        self.compile_expression()

        # NOT expresión para manejar fácilmente la terminación y if-goto
        self.vm_writer.write_unary(command='~')
        self.vm_writer.write_ifgoto(
            label='WHILE_END{}'.format(self.label_counter.get('while'))
        )

        while self._not_terminal_token_for('while'):
            self.tokenizer.advance()

            if self._statement_token():
                self.compile_statements()

        # escribir el goto
        self.vm_writer.write_goto(
            label='WHILE_EXP{}'.format(self.label_counter.get('while'))
        )
        # escribimos el fin de la etiqueta
        self.vm_writer.write_label(
            label='WHILE_END{}'.format(self.label_counter.get('while'))
        )
        #  agregar while al contador de etiquetas
        self.label_counter.increment('while')

    def compile_if(self):
        # avanzamos a la expresion start
        self.tokenizer.advance()
        self.tokenizer.advance()
        # compilamos dentro ()
        self.compile_expression()
        self.vm_writer.write_ifgoto(label='IF_TRUE{}'.format(self.label_counter.get('if')))
        self.vm_writer.write_goto(label='IF_FALSE{}'.format(self.label_counter.get('if')))
        self.vm_writer.write_label(label='IF_TRUE{}'.format(self.label_counter.get('if')))
        self.compile_conditional_body()
        if self._starting_token_for(keyword_token='conditional', position='next'):
            self.tokenizer.advance()
            self.vm_writer.write_goto(
                label='IF_END{}'.format(self.label_counter.get('if'))
            )
            self.vm_writer.write_label(
                label='IF_FALSE{}'.format(self.label_counter.get('if'))
            )
            self.compile_conditional_body()
            self.vm_writer.write_label(
                label='IF_END{}'.format(self.label_counter.get('if'))
            )
        else:
            self.vm_writer.write_label(
                label='IF_FALSE{}'.format(self.label_counter.get('if'))
            )

    def compile_conditional_body(self):
        while self._not_terminal_token_for('if'):
            self.tokenizer.advance()

            if self._statement_token():
                if self.tokenizer.current_token.is_if():
                    self.label_counter.increment('if')
                    self.compile_statements()
                    self.label_counter.decrement('if')
                else:
                    self.compile_statements()

    def compile_expression(self):
        """
        many examples..i,e., x = 4
        """
        # las operaciones se compilan al final en orden inverso al que fueron agregadas
        ops = []

        while self._not_terminal_token_for('expression'):
            if self._subroutine_call():
                self.compile_subroutine_call()
            elif self._array_expression():
                self.compile_array_expression()
            elif self.tokenizer.current_token.text.isdigit():
                self.vm_writer.write_push(
                    segment='constant',
                    index=self.tokenizer.current_token.text
                )
            elif self.tokenizer.identifier():
                self.compile_symbol_push()
            elif self.tokenizer.current_token.is_operator() and not self._part_of_expression_list():
                ops.insert(0, Operator(token=self.tokenizer.current_token.text, category='bi'))
            elif self.tokenizer.current_token.is_unary_operator():
                ops.insert(0, Operator(token=self.tokenizer.current_token.text, category='unary'))
            elif self.tokenizer.string_const():
                self.compile_string_const()
            elif self.tokenizer.boolean():  # caso booleano
                self.compile_boolean()
            elif self._starting_token_for('expression'):  # expresión anidada
                # saltamos el inicial (
                self.tokenizer.advance()
                self.compile_expression()
            elif self.tokenizer.null():
                self.vm_writer.write_push(segment='constant', index=0)

            self.tokenizer.advance()

        for op in ops:
            self.compile_op(op)

    def compile_op(self, op):

        if op.unary():
            self.vm_writer.write_unary(command=op.token)
        elif op.multiplication():
            self.vm_writer.write_call(name='Math.multiply', num_args=2)
        elif op.division():
            self.vm_writer.write_call(name='Math.divide', num_args=2)
        else:
            self.vm_writer.write_arithmetic(command=op.token)

    def compile_boolean(self):
        """
        True o False
        """
        self.vm_writer.write_push(segment='constant', index=0)

        if self.tokenizer.boolean() == 'true':
            self.vm_writer.write_unary(command='~')

    def compile_string_const(self):

        string_length = len(self.tokenizer.string_const())
        self.vm_writer.write_push(segment='constant', index=string_length)
        self.vm_writer.write_call(name='String.new', num_args=1)
        # construir cadena a partir de caracteres
        for char in self.tokenizer.string_const():
            if not char == self.tokenizer.STRING_CONST_DELIMITER:
                ascii_value_of_char = ord(char)
                self.vm_writer.write_push(segment='constant', index=ascii_value_of_char)
                self.vm_writer.write_call(name='String.appendChar', num_args=2)

    def compile_symbol_push(self):

        symbol = self._find_symbol_in_symbol_tables(symbol_name=self.tokenizer.identifier())
        segment = symbol['kind']
        index = symbol['index']
        self.vm_writer.write_push(segment=segment, index=index)

    def compile_array_expression(self):

        symbol_name = self.tokenizer.current_token.text
        symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name)
        # llegar a la expresión de índice
        self.tokenizer.advance()
        self.tokenizer.advance()
        # compilamos
        self.compile_expression()
        self.vm_writer.write_push(segment='local', index=symbol['index'])
        # agregar dos direcciones: identificador y resultado de expresión
        self.vm_writer.write_arithmetic(command='+')
        self.vm_writer.write_pop(segment='pointer', index=1)
        # agreamos el valor a la pila
        self.vm_writer.write_push(segment='that', index=0)

    def compile_subroutine_call(self):
        """
        example: Memory.peek(8000)
        """
        subroutine_name = ''

        while not self._starting_token_for('expression_list'):
            subroutine_name += self.tokenizer.current_token.text
            self.tokenizer.advance()
        # obtenemos el numero de argumentos
        num_args = self.compile_expression_list()
        # después de enviar argumentos a la pila
        self.vm_writer.write_call(name=subroutine_name, num_args=num_args)

    def compile_expression_list(self):

        num_args = 0

        if self._empty_expression_list():
            return num_args

        # iniciamos las expresiones
        self.tokenizer.advance()

        while self._not_terminal_token_for('expression_list'):
            num_args += 1
            self.compile_expression()
            if self._another_expression_coming():
                self.tokenizer.advance()
        return num_args

    def compile_return(self):
        if self._not_terminal_token_for(keyword_token='return', position='next'):
            self.compile_expression()
        else:
            self.vm_writer.write_push(segment='constant', index='0')
            self.tokenizer.advance()

        self.vm_writer.write_return()

    def _not_terminal_token_for(self, keyword_token, position='current'):
        if position == 'current':
            return not self.tokenizer.current_token.text in self.TERMINATING_TOKENS[keyword_token]
        elif position == 'next':
            return not self.tokenizer.next_token.text in self.TERMINATING_TOKENS[keyword_token]

    def _starting_token_for(self, keyword_token, position='current'):
        if position == 'current':
            return self.tokenizer.current_token.text in self.STARTING_TOKENS[keyword_token]
        elif position == 'next':
            return self.tokenizer.next_token.text in self.STARTING_TOKENS[keyword_token]

    def _statement_token(self):
        return self.tokenizer.current_token.is_statement_token()

    def _another_expression_coming(self):
        return self.tokenizer.current_token.is_expression_list_delimiter()

    def _find_symbol_in_symbol_tables(self, symbol_name):
        if self.subroutine_symbol_table.find_symbol_by_name(symbol_name):
            return self.subroutine_symbol_table.find_symbol_by_name(symbol_name)
        elif self.class_symbol_table.find_symbol_by_name(symbol_name):
            return self.class_symbol_table.find_symbol_by_name(symbol_name)

    def _empty_expression_list(self):
        return self._start_of_expression_list() and self._next_ends_expression_list()

    def _start_of_expression_list(self):
        return self.tokenizer.current_token.text in self.STARTING_TOKENS['expression_list']

    def _next_ends_expression_list(self):
        return self.tokenizer.next_token.text in self.TERMINATING_TOKENS['expression_list']

    def _subroutine_call(self):
        return self.tokenizer.identifier() and self.tokenizer.next_token.is_subroutine_call_delimiter()

    def _array_expression(self):
        return self.tokenizer.identifier() and self._starting_token_for(keyword_token='array', position='next')

    def _part_of_expression_list(self):
        return self.tokenizer.part_of_expression_list()
Esempio n. 4
0
    def compileTerm(self, operation=None):
        def get_condition():
            res_list = []
            for k in KEYWORD_CONSTANTS:
                res_list.append(self.words_exist([k]))
            res = False
            for r in res_list:
                res = res or r
            return res
        
        self.open_tag('term')
        if self.words_exist(['integerConstant']) or self.words_exist(['stringConstant']) or get_condition():
            self.format_and_write_line()
            if self.vm:
                value = self.get_xml_value()
                if value == 'true':
                    value = '1'
                    # this might have consequence. PLUM
                    operation = 'neg'
                elif value == 'false' or value == 'null':
                    value = 0
                self.compiled.write(
                    VMWriter.write_push('constant', value)
                )
                if operation:
                    self.compiled.write(
                        VMWriter.write_arithmetic(operation)
                    )
            self.advance()
        elif self.words_exist(['identifier']):
            name = self.get_xml_value()
            kind = self.SYMBOL_TABLE.kind_of(name)
            index = self.SYMBOL_TABLE.index_of(name)
            self.format_and_write_line({'category': None, 'defined':False, 'kind':kind, 'index':index})
            self.advance()
            # THIS ONLY WORKS FOR SIMPLE IDENTIFIERS, should refactor for indexing arrays
            KIND_LOOKUP = {'static': 'static', 'field': 'this', 'arg': 'argument', 'var': 'local'}
            if kind is not None:
                self.compiled.write(
                    VMWriter.write_push(KIND_LOOKUP[kind], index)
                )
            # if there is a [ next
            if self.words_exist(['symbol', '[']):
                self.format_and_write_line()
                self.advance()
                self.compileExpression()
                if self.words_exist(['symbol', ']']):
                    self.format_and_write_line()
                    self.advance()
                else:
                    raise
            # if there is a ( next subroutine call, it will leave its value on the stack
            elif self.words_exist(['(']) or self.words_exist(['.']):
                self.compileSubroutineCall(identifier_compiled=True, identifier=name)

        elif self.words_exist(['(', 'symbol']):
            self.format_and_write_line()
            self.advance()
            self.compileExpression()
            if self.words_exist([')', 'symbol']):
                self.format_and_write_line()
                self.advance()
            else:
                raise
        elif self.words_exist(['-']) or self.words_exist(['~']):
            if self.words_exist(['-']):
                operation = 'neg'
            else:
                operation = '~'
            self.format_and_write_line()
            self.advance()
            self.compileTerm(operation=operation)
        else:
            raise
        self.close_tag('term')
Esempio n. 5
0
class CompilationEngine:
    def __init__(self, inpath, outpath):
        self.tokenizer = Tokenizer(inpath)
        self.symboltable = SymbolTable()
        self.vmwriter = VMWriter(outpath)
        self._class_name = None
        if self.tokenizer.has_more_tokens():
            self.compile_class()
        self.vmwriter.close()
        print("{0} completed.".format(outpath))

    def _subroutine_init(self):
        self._sub_kind = None
        self._sub_name = None
        self._ret_type = None

    def _advance(self):
        self._check_EOF()
        self.tokenizer.advance()

    @property
    def _current_token(self):
        t_type = self.tokenizer.token_type
        return (self.tokenizer.keyword if t_type == T_KEYWORD else
                self.tokenizer.symbol if t_type == T_SYMBOL else
                self.tokenizer.identifier if t_type == T_ID else self.tokenizer
                .intval if t_type == T_INTEGER else self.tokenizer.stringval)

    @property
    def _current_tok_type(self):
        return self.tokenizer.token_type

    @property
    def _current_tok_tag(self):
        return token_tags[self._current_tok_type]

    @property
    def _next_token(self):
        """return raw next_token in the tokenizer"""
        return str(self.tokenizer.next_token)

    def _require_token(self, tok_type, token=None):
        """Check whether the next_token(terminal) in the tokenizer meets the 
        requirement (specific token or just token type). If meets, tokenizer
        advances (update current_token and next_token)  and terminal will be 
        writed into outfile; If not, report an error."""
        self._advance()
        if token and self._current_token != token:
            return self._error(expect_toks=(token, ))
        elif self._current_tok_type != tok_type:
            return self._error(expect_types=(tok_type, ))

    def _require_id(self):
        self._require_token(T_ID)

    def _require_kw(self, token):
        return self._require_token(T_KEYWORD, token=token)

    def _require_sym(self, token):
        return self._require_token(T_SYMBOL, token=token)

    def _require_brackets(self, brackets, procedure):
        front, back = brackets
        self._require_sym(front)
        procedure()
        self._require_sym(back)

    def _fol_by_class_vardec(self):
        return self._next_token in (KW_STATIC, KW_FIELD)

    def _fol_by_subroutine(self):
        return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD)

    def _fol_by_vardec(self):
        return self._next_token == KW_VAR

    #########################
    # structure compilation #
    #########################

    def compile_class_name(self):
        self._require_id()
        self._class_name = self._current_token

    def compile_subroutine_name(self):
        self._require_id()
        self._sub_name = self._current_token

    def compile_var_name(self, kind=None, type=None, declare=False):
        self._require_id()
        name = self._current_token
        if declare is True:  # kind and type are not None
            self.symboltable.define(name, type, kind)
        else:
            self.check_var_name(name, type)

    def check_var_name(self, name, type=None):
        recorded_kind = self.symboltable.kindof(name)
        if recorded_kind is None:
            self._traceback('name used before declared: {0}'.format(name))
        elif type is not None:
            recorded_type = self.symboltable.typeof(name)
            if recorded_type != type:
                get = '{0} "{1}"'.format(recorded_type, name)
                self._error(expect_types=(type, ), get=get)

    def compile_type(self, advanced=False, expect='type'):
        # int, string, boolean or identifier(className)
        if advanced is False:
            self._advance()
        if (self._current_token not in SymbolTable.builtIn_types
                and self._current_tok_type != T_ID):
            return self._error(expect=expect)

    def compile_return_type(self):
        # void or type
        self._advance()
        if self._current_token != KW_VOID:
            self.compile_type(True, '"void" or type')
        self._ret_type = self._current_token
        if self._sub_kind == KW_CONSTRUCTOR and self._ret_type != self._class_name:
            me = 'constructor expect current class as return type'
            self._traceback(me)

    @record_non_terminal('class')
    def compile_class(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        self._require_kw(KW_CLASS)
        self.compile_class_name()
        self._require_sym('{')
        while self._fol_by_class_vardec():
            self.compile_class_vardec()
        while self._fol_by_subroutine():
            self.compile_subroutine()
        self._advance()
        if self._current_token != '}':
            self._traceback("Except classVarDec first, subroutineDec second.")
        if self.tokenizer.has_more_tokens():
            if self._next_token == KW_CLASS:
                self._traceback('Only expect one classDec.')
            self._traceback('Unexpected extra tokens.')

    def compile_declare(self):
        self._advance()
        id_kind = self._current_token  # ('static | field | var')
        # type varName (',' varName)* ';'
        self.compile_type()
        id_type = self._current_token
        self.compile_var_name(id_kind, id_type, declare=True)
        # compile ',' or ';'
        self._advance()
        while self._current_token == ',':
            self.compile_var_name(id_kind, id_type, declare=True)
            self._advance()
        if self._current_token != ';':
            return self._error((',', ';'))

    @record_non_terminal('classVarDec')
    def compile_class_vardec(self):
        # ('static|field') type varName (',' varName)* ';'
        self.compile_declare()

    @record_non_terminal('subroutineDec')
    def compile_subroutine(self):
        # ('constructor'|'function'|'method')
        # ('void'|type) subroutineName '(' parameterList ')' subroutineBody
        self._subroutine_init()
        self.symboltable.start_subroutine()
        self._advance()
        self._sub_kind = self._current_token
        if self._sub_kind == KW_METHOD:
            self.symboltable.define('this', self._class_name, 'argument')
        self.compile_return_type()
        self.compile_subroutine_name()
        self._require_brackets('()', self.compile_parameter_list)
        self.compile_subroutine_body()

    @record_non_terminal('parameterList')
    def compile_parameter_list(self):
        # ((type varName) (',' type varName)*)?
        if self._next_token == ')':
            return
        self.compile_type()
        self.compile_var_name('argument', self._current_token, True)
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_type()
            self.compile_var_name('argument', self._current_token, True)

    @record_non_terminal('subroutineBody')
    def compile_subroutine_body(self):
        # '{' varDec* statements '}'
        self._require_sym('{')
        while self._fol_by_vardec():
            self.compile_vardec()
        self.compile_function()
        self.compile_statements()
        self._require_sym('}')

    def compile_function(self):
        fn_name = '.'.join((self._class_name, self._sub_name))
        num_locals = self.symboltable.varcount(KW_VAR)
        self.vmwriter.write_function(fn_name,
                                     num_locals)  # function fn_name num_locals
        # set up pointer this
        if self._sub_kind == KW_CONSTRUCTOR:
            num_fields = self.symboltable.varcount(KW_FIELD)
            self.vmwriter.write_push('constant', num_fields)
            self.vmwriter.write_call('Memory.alloc', 1)
            self.vmwriter.write_pop('pointer', 0)
        elif self._sub_kind == KW_METHOD:
            self.vmwriter.write_push('argument', 0)
            self.vmwriter.write_pop('pointer', 0)

    @record_non_terminal('varDec')
    def compile_vardec(self):
        # 'var' type varName (',' varName)* ';'
        self.compile_declare()

    #########################
    # statement compilation #
    #########################

    @record_non_terminal('statements')
    def compile_statements(self):
        # (letStatement | ifStatement | whileStatement | doStatement |
        # returnStatement)*
        last_statement = None
        while self._next_token != '}':
            self._advance()
            last_statement = self._current_token
            if last_statement == 'do':
                self.compile_do()
            elif last_statement == 'let':
                self.compile_let()
            elif last_statement == 'while':
                self.compile_while()
            elif last_statement == 'return':
                self.compile_return()
            elif last_statement == 'if':
                self.compile_if()
            else:
                return self._error(expect='statement expression')
        #if STACK[-2] == 'subroutineBody' and last_statement != 'return':
        #    self._error(expect='return statement', get=last_statement)

    @record_non_terminal('doStatement')
    def compile_do(self):
        # 'do' subroutineCall ';'
        self._advance()
        self.compile_subroutine_call()
        self.vmwriter.write_pop('temp', 0)  # temp[0] store useless value
        self._require_sym(';')

    @record_non_terminal('letStatement')
    def compile_let(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        self.compile_var_name()
        var_name = self._current_token
        array = (self._next_token == '[')
        if array:
            self.compile_array_subscript(
                var_name)  # push (array base + subscript)
        self._require_sym('=')
        self.compile_expression()  # push expression value
        self._require_sym(';')
        if array:
            self.vmwriter.write_pop('temp', 1)  # pop exp value to temp[1]
            self.vmwriter.write_pop('pointer',
                                    1)  # that = array base + subscript
            self.vmwriter.write_push('temp', 1)
            self.vmwriter.write_pop('that', 0)
        else:
            self.assign_variable(var_name)

    kind_segment = {
        'static': 'static',
        'field': 'this',
        'argument': 'argument',
        'var': 'local'
    }

    def assign_variable(self, name):
        kind = self.symboltable.kindof(name)
        index = self.symboltable.indexof(name)
        self.vmwriter.write_pop(self.kind_segment[kind], index)

    def load_variable(self, name):
        kind = self.symboltable.kindof(name)
        index = self.symboltable.indexof(name)
        self.vmwriter.write_push(self.kind_segment[kind], index)

    label_num = 0

    @record_non_terminal('whileStatement')
    def compile_while(self):
        # 'while' '(' expression ')' '{' statements '}'
        start_label = 'WHILE_START_' + str(self.label_num)
        end_label = 'WHILE_END_' + str(self.label_num)
        self.label_num += 1
        self.vmwriter.write_label(start_label)
        self.compile_cond_expression(start_label, end_label)

    @record_non_terminal('ifStatement')
    def compile_if(self):
        # 'if' '(' expression ')' '{' statements '}'
        # ('else' '{' statements '}')?
        else_label = 'IF_ELSE_' + str(self.label_num)
        end_label = 'IF_END_' + str(self.label_num)
        self.label_num += 1
        self.compile_cond_expression(end_label, else_label)
        # else clause
        if self._next_token == KW_ELSE:
            self._require_kw(KW_ELSE)
            self._require_brackets('{}', self.compile_statements)
        self.vmwriter.write_label(end_label)

    def compile_cond_expression(self, goto_label, end_label):
        self._require_brackets('()', self.compile_expression)
        self.vmwriter.write_arithmetic('not')
        self.vmwriter.write_if(end_label)
        self._require_brackets('{}', self.compile_statements)
        self.vmwriter.write_goto(goto_label)  # meet
        self.vmwriter.write_label(end_label)

    @record_non_terminal('returnStatement')
    def compile_return(self):
        # 'return' expression? ';'
        if self._sub_kind == KW_CONSTRUCTOR:
            self._require_kw(KW_THIS)  # constructor must return 'this'
            self.vmwriter.write_push('pointer', 0)
        elif self._next_token != ';':
            self.compile_expression()
        else:
            if self._ret_type != KW_VOID:
                self._traceback('expect return ' + self._ret_type)
            self.vmwriter.write_push('constant', 0)
        self._require_sym(';')
        self.vmwriter.write_return()

    ##########################
    # expression compilation #
    ##########################

    unary_ops = {'-': 'neg', '~': 'not'}
    binary_ops = {
        '+': 'add',
        '-': 'sub',
        '*': None,
        '/': None,
        '&': 'and',
        '|': 'or',
        '<': 'lt',
        '>': 'gt',
        '=': 'eq'
    }

    @record_non_terminal('expression')
    def compile_expression(self):
        # term (op term)*
        self.compile_term()
        while self._next_token in self.binary_ops:
            self._advance()
            if self._current_tok_type != T_SYMBOL:
                self._error(expect_types=(T_SYMBOL, ))
            op = self._current_token
            self.compile_term()
            self.compile_binaryop(op)

    def compile_binaryop(self, op):
        if op == '*':
            self.vmwriter.write_call('Math.multiply', 2)
        elif op == '/':
            self.vmwriter.write_call('Math.divide', 2)
        else:
            self.vmwriter.write_arithmetic(self.binary_ops[op])

    kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS)

    @record_non_terminal('term')
    def compile_term(self):
        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall |
        # '(' expression ')' | unaryOp term
        if self._next_token == '(':
            self._require_brackets('()', self.compile_expression)
        else:
            self._advance()
            tok = self._current_token
            tok_type = self._current_tok_type
            if tok_type == T_KEYWORD and tok in self.kw_consts:
                self.compile_kw_consts(tok)
            elif tok_type == T_INTEGER:
                self.vmwriter.write_push('constant', tok)
            elif tok_type == T_STRING:
                self.compile_string(tok)
            elif tok_type == T_ID:
                if self._next_token in '(.':
                    self.compile_subroutine_call()
                elif self._next_token == '[':
                    self.check_var_name(tok)
                    self.compile_array_subscript(tok)
                    self.vmwriter.write_pop('pointer', 1)
                    self.vmwriter.write_push('that', 0)
                else:
                    self.check_var_name(tok)
                    self.load_variable(tok)
            elif tok_type == T_SYMBOL and tok in self.unary_ops:
                self.compile_term()
                self.vmwriter.write_arithmetic(self.unary_ops[tok])
            else:
                self._error(expect='term')

    # keywordConstant: 'true' | 'false' | 'null' | 'this'
    def compile_kw_consts(self, kw):
        if kw == KW_THIS:
            self.vmwriter.write_push('pointer', 0)
        elif kw == KW_TRUE:
            self.vmwriter.write_push('constant', 1)
            self.vmwriter.write_arithmetic('neg')
        else:
            self.vmwriter.write_push('constant', 0)

    def compile_string(self, string):
        self.vmwriter.write_push('constant', len(string))
        self.vmwriter.write_call('String.new', 1)
        for char in string:
            self.vmwriter.write_push('constant', ord(char))
            self.vmwriter.write_call('String.appendChar', 2)

    def compile_subroutine_call(self):
        # subroutineName '(' expressionList ')' |
        # (className | varName) '.' subroutineName '(' expressionList ')'
        ## the first element of structure has already been compiled.
        fn_name, num_args = self.compile_call_name()
        self._require_sym('(')
        num_args = self.compile_expressionlist(num_args)
        self._require_sym(')')
        self.vmwriter.write_call(fn_name, num_args)

    def compile_call_name(self):
        # the fisrt name of subroutine call could be (className or varName) if
        # it is followed by '.', or subroutineName if followed by '('.
        # return name of function call and num_args (1: means pushing this, 0:
        # means don't)
        if self._current_tok_type != T_ID:
            self._error(expect_types=(T_ID, ))
        name = self._current_token
        if self._next_token == '.':
            self._require_sym('.')
            self.compile_subroutine_name()
            sub_name = self._current_token
            if (name in self.symboltable.all_class_types()
                    or name in SymbolTable.builtIn_class
                    or name == self._class_name):
                return '.'.join((name, sub_name)), 0  # className
            else:
                self.check_var_name(name)  # varName with class type
                type = self.symboltable.typeof(name)
                if type in SymbolTable.builtIn_types:
                    return self._error(expect='class instance or class',
                                       get=type)
                self.load_variable(name)
                return '.'.join((type, sub_name)), 1
        elif self._next_token == '(':
            self.vmwriter.write_push('pointer', 0)  # push this to be 1st arg
            return '.'.join((self._class_name, name)), 1  # subroutineName

    @record_non_terminal('expressionList')
    def compile_expressionlist(self, num_args):
        # (expression (',' expression)*)?
        if self._next_token != ')':
            self.compile_expression()
            num_args += 1
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_expression()
            num_args += 1
        return num_args

    def compile_array_subscript(self, var_name):
        # varName '[' expression ']'
        self.check_var_name(var_name, 'Array')
        self._require_brackets(
            '[]', self.compile_expression)  # push expression value
        self.load_variable(var_name)
        self.vmwriter.write_arithmetic('add')  # base + subscript

    def _check_EOF(self):
        if not self.tokenizer.has_more_tokens():
            self._traceback("Unexpected EOF.")

    def _error(self, expect_toks=(), expect_types=(), expect=None, get=None):
        if expect is None:
            exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks))
            exp_types = ('type {0}'.format(token_tags[t])
                         for t in expect_types)
            exp_type = ' or '.join(exp_types)
            if exp_tok and exp_type:
                expect = ' or '.join(exp_tok, exp_type)
            else:
                expect = exp_tok + exp_type
        if get is None:
            get = self._current_token
        me = 'Expect {0} but get "{1}"'.format(expect, get)
        return self._traceback(me)

    def _traceback(self, message):
        if DEBUG:
            print('--------------------------------------------')
            print(self.symboltable)
            print(self.symboltable.all_class_types())
            print('--------------------------------------------')
        file_info = 'file: "{0}"'.format(self.tokenizer.filename)
        line_info = 'line {0}'.format(self.tokenizer.line_count)
        raise CompileError("{0}, {1}: {2}".format(file_info, line_info,
                                                  message))
Esempio n. 6
0
class CompilationEngine:
    XML_LINE = "<{0}> {1} </{0}>\n"
    COMPARE_SYM_REPLACER = {
        '<': "&lt;",
        '>': "&gt;",
        '"': "&quot;",
        '&': "&amp;"
    }
    KEYWORD_CONSTANT = ("true", "false", "null", "this")

    def __init__(self, input_stream, output_stream):
        """
        constructor of the Compilation Engine object
        :param input_stream: the input stream
        :param output_stream: the output stream
        """
        self.__tokenizer = Tokenizer(input_stream)  # Tokenizer object
        self.__output = VMWriter(output_stream)
        self.__symbol = SymbolTable()
        self.__class_name = ""
        self.__statements = {
            "let": self.compile_let,
            "if": self.compile_if,
            "while": self.compile_while,
            "do": self.compile_do,
            "return": self.compile_return
        }
        self.compile_class()
        # self.__output.close()

    def write_xml(self):
        """
        writing xml line
        """
        if self.__tokenizer.token_type() == "stringConstant":
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(),
                                     self.__tokenizer.string_val()))
        elif self.__tokenizer.get_value() in self.COMPARE_SYM_REPLACER:
            xml_val = self.COMPARE_SYM_REPLACER[self.__tokenizer.get_value()]
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(), xml_val))
        else:
            self.__output.write(
                self.XML_LINE.format(self.__tokenizer.token_type(),
                                     self.__tokenizer.get_value()))

    def compile_class(self):
        """
        compiling the program from the class definition
        """
        # self.__output.write("<class>\n")
        # self.write_xml()
        self.__tokenizer.advance()  # skip "class"
        self.__class_name = self.__tokenizer.get_value()
        # self.write_xml()
        self.__tokenizer.advance()  # skip class name
        # self.write_xml()
        self.__tokenizer.advance()  # skip {
        current_token = self.__tokenizer.get_value()
        while current_token == "static" or current_token == "field":
            self.compile_class_var_dec()
            current_token = self.__tokenizer.get_value()
        while current_token == "constructor" or current_token == "function" or current_token == "method":
            self.compile_subroutine_dec()
            current_token = self.__tokenizer.get_value()
        # self.write_xml()
        # self.__output.write("</class>\n")
        self.__output.close()

    def compile_class_var_dec(self):
        """
        compiling the program from the class's declaration on vars
        """
        current_token = self.__tokenizer.get_value()
        while current_token == "static" or current_token == "field":
            # self.__output.write("<classVarDec>\n")
            # self.write_xml()
            index = self.__symbol.var_count(current_token)
            self.__tokenizer.advance()  # get token type
            token_type = self.__tokenizer.get_value()
            self.__output.write_push(current_token, index)
            self.__tokenizer.advance()  # get token name
            token_name = self.__tokenizer.get_value()
            self.__symbol.define(token_name, token_type, current_token)
            self.__tokenizer.advance()
            # self.write_xml()
            # self.__tokenizer.advance()
            # self.write_xml()
            # self.__tokenizer.advance()
            while self.__tokenizer.get_value() == ",":
                # self.write_xml()  # write ,
                self.__tokenizer.advance()  # get token name
                token_name = self.__tokenizer.get_value()
                index = self.__symbol.var_count(current_token)  # get new index
                self.__output.write_push(current_token, index)
                self.__symbol.define(token_name, token_type, current_token)
                self.__tokenizer.advance()
                # self.write_xml()  # write value
                # self.__tokenizer.advance()
            # self.write_xml()
            self.__tokenizer.advance()
            current_token = self.__tokenizer.get_value()
            # self.__output.write("</classVarDec>\n")

    def compile_subroutine_body(self):
        """
        compiling the program's subroutine body
        """
        # self.__output.write("<subroutineBody>\n")
        # self.write_xml()  # write {
        self.__tokenizer.advance()  # skip {
        while self.__tokenizer.get_value() == "var":
            self.compile_var_dec()
        self.compile_statements()
        # self.write_xml()  # write }
        self.__tokenizer.advance()  # skip }
        # self.__output.write("</subroutineBody>\n")

    def compile_subroutine_dec(self):
        """
        compiling the program's subroutine declaration
        """
        # self.__output.write("<subroutineDec>\n")
        # self.write_xml()  # write constructor/function/method

        self.__tokenizer.advance()  # skip constructor/function/method
        return_value = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        func_name = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        func_args = self.compile_parameter_list()
        self.__output.write_function(func_name, func_args)
        self.compile_subroutine_body()
        if return_value == "void":
            self.__output.write_pop("temp", "0")
        # self.__output.write("</subroutineDec>\n")

    def compile_parameter_list(self):
        """
        compiling a parameter list
        """
        # todo returns the number og args !
        # self.write_xml()  # write (
        counter = 0
        self.__tokenizer.advance()  # skip (
        # self.__output.write("<parameterList>\n")
        if self.__tokenizer.get_value() != ")":
            # self.write_xml()  # write type
            self.__tokenizer.advance()  # skip type
            # self.write_xml()  # write varName
            self.__tokenizer.advance()  # skip var name
            counter += 1
            while self.__tokenizer.get_value() == ",":
                # self.write_xml()  # write ,
                self.__tokenizer.advance()  # skip ,
                # self.write_xml()  # type
                self.__tokenizer.advance()  # skip type
                # self.write_xml()  # varName
                self.__tokenizer.advance()  # skip varName
                counter += 1
        # self.__output.write("</parameterList>\n")
        # self.write_xml()  # write )
        self.__tokenizer.advance()
        return counter

    def compile_var_dec(self):
        """
        compiling function's var declaration
        """
        # self.__output.write("<varDec>\n")
        # self.write_xml()  # write var
        token_kind = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        # self.write_xml()  # write type
        token_type = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        # self.write_xml()  # write varName
        token_name = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        index = self.__symbol.var_count(token_kind)
        self.__output.write_push(token_kind, index)
        self.__symbol.define(token_name, token_type, token_kind)
        while self.__tokenizer.get_value() == ",":
            # self.write_xml()  # write ,
            self.__tokenizer.advance()  # skip ,
            # self.write_xml()
            token_name = self.__tokenizer.get_value()
            index = self.__symbol.var_count(token_kind)
            self.__output.write_push(token_kind, index)
            self.__symbol.define(token_name, token_type, token_kind)
            self.__tokenizer.advance()
        # self.write_xml()  # write ;
        self.__tokenizer.advance()  # skip ;
        # self.__output.write("</varDec>\n")

    def compile_statements(self):
        """
        compiling statements
        """
        key = self.__tokenizer.get_value()
        # self.__output.write("<statements>\n")
        if key != "}":
            while key in self.__statements:
                self.__statements[self.__tokenizer.get_value()]()
                key = self.__tokenizer.get_value()
        # self.__output.write("</statements>\n")

    def compile_do(self):
        """
        compiling do call
        """
        # self.__output.write("<doStatement>\n")
        # self.write_xml()  # write do
        self.__tokenizer.advance()  # skip do
        self.subroutine_call()
        # self.write_xml()  # write ;
        self.__tokenizer.advance()  # skip ;
        # self.__output.write("</doStatement>\n")

    def compile_let(self):
        """
        compiling let call
        """
        # self.__output.write("<letStatement>\n")
        # self.write_xml()  # write let
        self.__tokenizer.advance()  # skip let
        # self.write_xml()  # write varName
        var_name = self.__tokenizer.get_value()
        self.__tokenizer.advance()
        # if self.__tokenizer.get_value() == "[":  # todo handle array
        #     self.write_xml()  # write [
        #     self.__tokenizer.advance()
        #     self.compile_expression()
        #     self.write_xml()  # write ]
        #     self.__tokenizer.advance()
        # self.write_xml()  # write =
        self.__tokenizer.advance()  # skip =
        self.compile_expression()  # todo push the value to the stack
        # self.write_xml()  # write ;
        self.__tokenizer.advance()  # skip ;
        # self.__output.write("</letStatement>\n")
        var_kind = self.__symbol.kind_of(var_name)
        var_index = self.__symbol.index_of(var_name)
        self.__output.write_pop(var_kind, var_index)

    def compile_while(self):
        """
        compiling while loop call
        """
        self.__output.write("<whileStatement>\n")
        self.write_xml()  # write while
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression()
        self.write_xml()  # write )
        self.__tokenizer.advance()
        self.write_xml()  # write {
        self.__tokenizer.advance()
        self.compile_statements()
        self.write_xml()  # write }
        self.__tokenizer.advance()
        self.__output.write("</whileStatement>\n")

    def compile_return(self):
        """
        compiling return statement
        """
        self.__output.write("<returnStatement>\n")
        self.write_xml()  # write return
        self.__tokenizer.advance()
        if self.__tokenizer.get_value() != ";":
            self.compile_expression()
        self.write_xml()  # write ;
        self.__tokenizer.advance()
        self.__output.write("</returnStatement>\n")

    def compile_if(self):
        """
        compiling if condition
        """
        self.__output.write("<ifStatement>\n")
        self.write_xml()  # write if
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression()
        self.write_xml()  # write )
        self.__tokenizer.advance()
        self.write_xml()  # write {
        self.__tokenizer.advance()
        self.compile_statements()
        self.write_xml()  # write }
        self.__tokenizer.advance()
        if self.__tokenizer.get_value() == "else":
            self.write_xml()  # write else
            self.__tokenizer.advance()
            self.write_xml()  # write {
            self.__tokenizer.advance()
            self.compile_statements()
            self.write_xml()  # write }
            self.__tokenizer.advance()
        self.__output.write("</ifStatement>\n")

    def compile_expression(self):
        """
        compiling expressions
        """
        self.__output.write("<expression>\n")
        self.compile_term()
        while self.__tokenizer.is_operator():
            self.write_xml()  # write the operator
            self.__tokenizer.advance()
            self.compile_term()
        self.__output.write("</expression>\n")

    def compile_term(self):
        """
        compiling any kind of terms
        """
        # dealing with unknown token
        self.__output.write("<term>\n")
        curr_type = self.__tokenizer.token_type()
        # handle consts
        if curr_type == "integerConstant" or curr_type == "stringConstant":
            self.write_xml()  # write the int \ string
            self.__tokenizer.advance()

        # handle const keyword
        elif curr_type == "keyword" and self.__tokenizer.get_value(
        ) in self.KEYWORD_CONSTANT:
            self.__tokenizer.set_type("keywordConstant")
            self.write_xml()  # write key word
            self.__tokenizer.advance()

        elif curr_type == "identifier":
            # handle var names
            if self.__tokenizer.get_next_token(
            ) != "(" and self.__tokenizer.get_next_token() != ".":
                self.write_xml()  # write the var name
                self.__tokenizer.advance()
                if self.__tokenizer.get_value() == "[":
                    self.write_xml()  # write [
                    self.__tokenizer.advance()
                    self.compile_expression()
                    self.write_xml()  # write ]
                    self.__tokenizer.advance()
            # handle function calls
            else:
                self.subroutine_call()
        # handle expression
        elif curr_type == "symbol" and self.__tokenizer.get_value() == "(":
            self.write_xml()  # write (
            self.__tokenizer.advance()
            self.compile_expression()
            self.write_xml()  # write )
            self.__tokenizer.advance()

        # handle - \ ~
        elif self.__tokenizer.get_value() == "-" or self.__tokenizer.get_value(
        ) == "~":
            self.write_xml()  # write -\~
            self.__tokenizer.advance()
            self.compile_term()
        self.__output.write("</term>\n")

    def subroutine_call(self):
        """
        compiling the program's subroutine call
        """
        if self.__tokenizer.get_next_token() == ".":
            self.write_xml()  # write name
            self.__tokenizer.advance()
            self.write_xml()  # write .
            self.__tokenizer.advance()
        self.write_xml()  # write name
        self.__tokenizer.advance()
        self.write_xml()  # write (
        self.__tokenizer.advance()
        self.compile_expression_list()
        self.write_xml()  # write )
        self.__tokenizer.advance()

    def compile_expression_list(self):
        """
        compiling expression list
        """
        self.__output.write("<expressionList>\n")
        if self.__tokenizer.get_value() != ")":
            self.compile_expression()
            while self.__tokenizer.get_value() == ",":
                self.write_xml()  # write ,
                self.__tokenizer.advance()
                self.compile_expression()
        self.__output.write("</expressionList>\n")
Esempio n. 7
0
class CompilationEngine:

    DEBUG = False

    translate_dict = {
        '+': 'add',
        '-': 'sub',
        '=': 'eq',
        '>': 'gt',
        '<': 'lt',
        '&': 'and',
        '|': 'or',
        'unary-': 'neg',
        'unary~': 'not',
        'argument': 'argument',
        'static': 'static',
        'var': 'local',
        'field': 'this',
        '*': 'Math.multiply',
        '/': 'Math.divide'
    }

    def __init__(self, input: JackTokenizer, output_file_path):
        self.tokenizer = input
        self.vmwriter = VMWriter(output_file_path)
        self.symbol_table = SymbolTable()

        self.label_index = 0
        self.curr_token = ''
        self.curr_token_type = ''
        self.depth = 0

    def compile_class(self):
        self.print_open('compile_class')
        self.__next_token()  # class
        self.__next_token()  # className
        self.class_name = self.curr_token
        self.__next_token()  # {

        self.__next_token()
        while self.curr_token == 'static' or self.curr_token == 'field':
            self.compile_class_var_dec()

        while self.curr_token == 'constructor' or self.curr_token == 'function' or self.curr_token == 'method':
            self.compile_subroutine_dec()

        self.__next_token()  # after }
        self.vmwriter.close()
        self.print_close('compile_class_end')

    def compile_class_var_dec(self):
        self.print_open('compile_class_var_dec')
        kind = self.curr_token  # (static|field)

        self.__next_token()
        var_type = self.curr_token  # type

        self.__next_token()
        var_name = self.curr_token  # varName

        self.symbol_table.define(var_name, var_type, self.translate_dict[kind])

        self.__next_token()  # , or ;
        while (self.curr_token != ';'):
            self.__next_token()
            var_name = self.curr_token  # varName
            self.symbol_table.define(var_name, var_type,
                                     self.translate_dict[kind])
            self.__next_token()  # , or ;

        self.__next_token()  # after ;
        self.print_close('compile_class_var_dec_end')

    def compile_subroutine_dec(self):
        self.print_open('compile_subroutine_dec')
        self.symbol_table.start_subroutine()

        kind = self.curr_token  # (constructor|function|method)

        self.__next_token()
        var_type = self.curr_token  # (void|type)

        self.__next_token()
        subroutine_name = self.curr_token  # subroutineName

        self.__next_token()  # '('

        if kind == 'method':
            self.symbol_table.define('this', self.class_name, 'argument')

        self.__next_token()
        self.compile_parameter_list()

        self.__next_token()  # after ')'

        self.compile_subroutine_body(kind, var_type, subroutine_name)
        self.print_close('compile_subroutine_dec_end')

    def compile_parameter_list(self):
        self.print_open('compile_parameter_list')
        while self.curr_token != ')':
            if self.curr_token == ',':
                self.__next_token()

            var_type = self.curr_token  # type

            self.__next_token()
            var_name = self.curr_token  # varName

            self.symbol_table.define(var_name, var_type, 'argument')
            self.__next_token()
        self.print_close('compile_parameter_list_end')

    def compile_subroutine_body(self, kind, var_type, subroutine_name):
        self.print_open('compile_subroutine_body')
        self.__next_token()  # after '{'
        while self.curr_token == 'var':
            self.compile_var_dec()

        self.vmwriter.write_function(self.class_name + '.' + subroutine_name,
                                     self.symbol_table.var_count('local'))

        if kind == 'method':
            self.vmwriter.write_push('argument', 0)
            self.vmwriter.write_pop('pointer', 0)
        elif kind == 'constructor':
            self.vmwriter.write_push('constant',
                                     self.symbol_table.var_count('this'))
            self.vmwriter.write_call('Memory.alloc', 1)
            self.vmwriter.write_pop('pointer', 0)

        self.compile_statements()
        self.__next_token()  # after '}'

        self.print_close('compile_subroutine_body_end')

    def compile_var_dec(self):
        self.print_open('compile_var_dec')
        # curr token is var

        self.__next_token()
        var_type = self.curr_token  # type

        self.__next_token()
        var_name = self.curr_token  # varName

        self.symbol_table.define(var_name, var_type, 'local')

        self.__next_token()  # , or ;
        while self.curr_token != ';':
            self.__next_token()
            self.symbol_table.define(self.curr_token, type, 'local')
            self.__next_token()

        self.__next_token()  # after ;

        self.print_close('compile_var_dec_end')

    def compile_statements(self):
        self.print_open('compile_statements')
        while True:
            if self.curr_token == 'let':
                self.compile_let()
            elif self.curr_token == 'if':
                self.compile_if()
            elif self.curr_token == 'while':
                self.compile_while()
            elif self.curr_token == 'do ':
                self.compile_do()
            elif self.curr_token == 'return':
                self.compile_return()
            else:
                break
        self.print_close('compile_statements_end')

    def compile_let(self):
        self.print_open('compile_let')
        # curr_token is let
        self.__next_token()
        var_name = self.curr_token  # varName
        kind = self.symbol_table.kind_of(var_name)
        index = self.symbol_table.index_of(var_name)

        self.__next_token()
        if self.curr_token == '[':
            # push arr
            self.vmwriter.write_push(kind, index)

            # VM code for computing and pushing the value of expression1
            self.__next_token()
            self.compile_expression()
            self.__next_token()

            # add
            self.vmwriter.write_arithmetic('add')

            # VM code for computing and pushing the value of expression2
            self.__next_token()  # after =
            self.compile_expression()
            self.__next_token()  # after ;

            # pop temp 0
            self.vmwriter.write_pop('temp', 0)

            # pop pointer 1
            self.vmwriter.write_pop('pointer', 1)

            # push temp 0
            self.vmwriter.write_push('temp', 0)

            # pop that 0
            self.vmwriter.write_pop('that', 0)
        else:
            self.__next_token()  # after =
            self.compile_expression()
            self.__next_token()  # after ;

            self.vmwriter.write_pop(kind, index)
        self.print_close('compile_let_end')

    def compile_if(self):
        self.print_open('compile_if')
        # curr_token is if

        index_l = self.__next_label_index()

        self.__next_token()  # (
        self.__next_token()  # after (
        self.compile_expression()
        self.vmwriter.write_arithmetic('not')
        self.__next_token()  # ) --> {

        self.__next_token()  # { --> ?
        self.vmwriter.write_if('L1' + str(index_l))
        self.compile_statements()
        self.vmwriter.write_go_to('L2' + str(index_l))
        self.__next_token()  # } --> ?

        self.vmwriter.write_label('L1' + str(index_l))

        if self.curr_token == 'else':
            self.__next_token()  # else --> {

            self.__next_token()  # { --> ?
            self.compile_statements()
            self.__next_token()  # } --> ?

        self.vmwriter.write_label('L2' + str(index_l))
        self.print_close('compile_if_end')

    def compile_while(self):
        self.print_open('compile_while')
        # curr_token is while
        index = self.__next_label_index()

        self.vmwriter.write_label('L1' + str(index))
        self.__next_token()  # while --> (
        self.__next_token()  # ( --> ?
        self.compile_expression()
        self.__next_token()  # ) --> {

        self.vmwriter.write_arithmetic('not')
        self.vmwriter.write_if('L2' + str(index))

        self.__next_token()  # { --> ?
        self.compile_statements()
        self.__next_token()  # } --> ?
        self.vmwriter.write_go_to('L1' + str(index))

        self.vmwriter.write_label('L2' + str(index))
        self.print_close('compile_while_end')

    def compile_do(self):
        self.print_open('compile do')
        # curr_token is do
        self.__next_token()  # do --> (subroutineName | className | varName)
        self.subroutine_call()
        self.vmwriter.write_pop('temp', 0)  # because of void call
        self.__next_token()  # ; --> ?
        self.print_close('compile do_end')

    def subroutine_call(self, skipped=False, arg_name=''):
        self.print_open('subroutine_call')
        name = ''
        if skipped:
            name = arg_name
        else:
            name = self.curr_token  # (subroutineName | className | varName)
            self.__next_token()

        function = name
        args = 0
        if self.curr_token == '(':
            function = self.class_name + '.' + name
            self.vmwriter.write_push('pointer', 0)
            args = 1
        elif self.curr_token == '.':
            self.__next_token()  # . --> subroutine_name
            subroutine_name = self.curr_token

            kind = self.symbol_table.kind_of(name)
            if kind == None:
                function = name + '.' + subroutine_name
            else:
                var_type = self.symbol_table.type_of(name)
                function = var_type + '.' + subroutine_name
                self.vmwriter.write_push(kind,
                                         self.symbol_table.index_of(name))
                args = 1
            self.__next_token()  # subroutine_name --> (

        self.__next_token()  # ( --> ?
        expression_list_len = self.compile_expression_list()
        self.__next_token()  # ) --> ;

        self.vmwriter.write_call(function, args + expression_list_len)
        # self.__next_token() # ; --> ?
        self.print_close('subroutine_call_end')

    def compile_return(self):
        self.print_open('compile_return')
        # curr_token is return

        self.__next_token()  # return --> ?

        if self.curr_token != ';':
            self.compile_expression()
        else:
            self.vmwriter.write_push('constant', 0)

        self.__next_token()  # ; --> ?
        self.vmwriter.write_return()
        self.print_close('compile_return_end')

    def compile_expression(self):
        self.print_open('compile_expression')
        self.compile_term()

        while self.curr_token in {'+', '-', '*', '/', '&', '|', '<', '>', '='}:
            op = self.curr_token
            self.__next_token()
            self.compile_term()

            if op in ['*', '/']:
                self.vmwriter.write_call(self.translate_dict[op], 2)
            else:
                if op in self.translate_dict:
                    self.vmwriter.write_arithmetic(self.translate_dict[op])

        self.print_close('compile_expression_end')

    def compile_term(self):
        self.print_open('compile_term')
        if self.curr_token == '(':
            self.__next_token()  # ( --> ?
            self.compile_expression()
            self.__next_token()  # ) --> ?
        elif self.curr_token in {'-', '~'}:
            op = self.curr_token  # (-|~)
            self.__next_token()  # (-|~) --> ?
            self.compile_term()
            self.vmwriter.write_arithmetic(self.translate_dict['unary' + op])
        else:
            if self.curr_token_type == 'stringConstant':
                self.vmwriter.write_push('constant', len(self.curr_token))
                self.vmwriter.write_call('String.new', 1)

                for ch in self.curr_token:
                    self.vmwriter.write_push('constant', ord(ch))
                    self.vmwriter.write_call('String.appendChar', 2)

                self.__next_token()

            elif self.curr_token_type == 'integerConstant':
                self.vmwriter.write_push('constant', self.curr_token)
                self.__next_token()
            elif self.curr_token_type == 'keyword':
                if self.curr_token == 'this':
                    self.vmwriter.write_push('pointer', 0)
                else:
                    self.vmwriter.write_push('constant', 0)

                    if self.curr_token == 'true':
                        self.vmwriter.write_arithmetic('not')

                self.__next_token()
            else:
                temp = self.curr_token
                self.__next_token()
                if self.curr_token == '[':
                    self.vmwriter.write_push(self.symbol_table.kind_of(temp),
                                             self.symbol_table.index_of(temp))

                    self.__next_token()  # [ --> ?
                    self.compile_expression()
                    self.__next_token()  # ] --> ?

                    # add
                    self.vmwriter.write_arithmetic('add')

                    # pop pointer 1
                    self.vmwriter.write_pop('pointer', 1)

                    # push that 0
                    self.vmwriter.write_push('that', 0)

                elif self.curr_token in ['(', '.']:
                    self.subroutine_call(True, temp)
                else:
                    # var_name
                    self.vmwriter.write_push(self.symbol_table.kind_of(temp),
                                             self.symbol_table.index_of(temp))
                    # self.__next_token()

        self.print_close('compile_term_end')

    def compile_expression_list(self):
        self.print_open('compile_expression_list')
        count = 0

        while self.curr_token != ')':
            if self.curr_token == ',':
                self.__next_token()
            self.compile_expression()
            count += 1

        self.print_close('compile_expression_list_end')
        return count

    #-----------private methods----------------
    def __next_token(self):
        if self.DEBUG:
            print('  ' * self.depth + 'curr_token: ' + self.curr_token)

        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()
        self.curr_token_type = self.tokenizer.token_type()
        self.curr_token = self.tokenizer.keyword()

    def __next_label_index(self):
        index = self.label_index
        self.label_index += 1
        return index

    def print_open(self, string):
        if self.DEBUG:
            print('  ' * self.depth + string)
            self.depth += 1

    def print_close(self, string):
        if self.DEBUG:
            self.depth -= 1
            print('  ' * self.depth + string)
Esempio n. 8
0
class CompliationEngine(object):
    '''
    Effects the actual compilation output. Gets its input from a
    JackTokenizer and emits its parsed structure into an output file/stream
    '''
    MAP = {'<': "&lt;", '>': "&gt;", '"': "&quot;", '&': "&amp;"}

    def __init__(self, tokenizer, out_file_name):
        '''
        Constructor
        '''
        self._tokenizer = tokenizer
        self._vm_writer = VMWriter(out_file_name)
        self._class_name = None
        self._symbol_table = SymbolTable()
        self._counter = 0
        self._subroutine_name = None

    def Compile(self):
        token = str(self._tokenizer.next_token())
        if token == 'class':
            self.CompileClass(token)

    def CompileClass(self, token):
        """
        takes 'class' as token
        and end the compilation
        """
        self._class_name = self._tokenizer.next_token()  # got the class name
        str(self._tokenizer.next_token())  # '{'
        token = self._tokenizer.next_token()  # field declarations

        # For declaring Class Level Variable

        while token in ['field', 'static']:
            token = self.CompileClassVarDec(token)

        # Class Methods
        while token in ['function', 'method', 'constructor']:
            token = self.CompileSubroutine(token)

        self._vm_writer.writer_close()
        self._symbol_table.printSymbolTables()

    def CompileSubroutine(self, token):
        """
        Takes any among 'function', 'method', 'constructor'
        and return token after end of subroutine '}' 
        or simple next subroutine token
        """
        function_modifier = token

        str(self._tokenizer.next_token())  # return type
        function_name = str(self._tokenizer.next_token())  # name of function

        self._subroutine_name = function_name

        self._symbol_table.startSubRoutine(function_name)
        if function_modifier == 'method':
            self._symbol_table.define(['this', self._class_name, 'argument'])

        str(self._tokenizer.next_token())  # '('

        token = str(self._tokenizer.next_token())  # 'arguments'

        while token != ')':
            token = self.CompileParamList(token)

        str(self._tokenizer.next_token())  # '{'
        token = str(self._tokenizer.next_token())  # Statements or '}'

        while token == 'var':
            token = self.CompileVarDec(token)

        local_variables = self._symbol_table.varCount('local')

        # Writing Function VM
        self._vm_writer.write_subroutine(self._class_name, function_name,
                                         local_variables)
        if function_name == 'new':
            no_of_fields = self._symbol_table.varCount('field')
            self._vm_writer.write_push('constant', no_of_fields)
            self._vm_writer.write_call('Memory', 'alloc', 1)
            self._vm_writer.write_pop('pointer', 0)
        if function_modifier == 'method':
            self._vm_writer.write_push('argument', 0)
            self._vm_writer.write_pop('pointer', 0)
        """temp_buffer = ""
        while local_variables > 0:
            temp_buffer += 'push constant 0\n'
            local_variables -= 1
        
        self._out_file_object.write(temp_buffer)
        self._out_file_object.flush()"""

        while token != '}':
            token = self.CompileStatements(token)

        token = str(self._tokenizer.next_token())  # next subroutine
        return token

    def CompileStatements(self, token):
        if token == 'return':
            return self.CompileReturn(token)
        if token == 'do':
            return self.CompileDo(token)
        if token == 'let':
            return self.CompileLet(token)
        if token == 'while':
            return self.CompileWhile(token)
        if token == 'if':
            return self.CompileIf(token)

    def CompileIf(self, token):
        """
        Takes 'if' keyword and returns next statement token
        """
        self._counter += 1  # for linear label names
        str(self._tokenizer.next_token())  # '('

        token = str(self._tokenizer.next_token())
        token = self.CompileExpression(token)  # returns ')'

        self._vm_writer.write_arithmatic('~')
        label = self._class_name + '.' + 'if.' + str(self._counter) + '.L1'
        self._vm_writer.write_if_goto(label)

        str(self._tokenizer.next_token())  # '}'
        token = str(self._tokenizer.next_token())

        goto_label = self._class_name + '.' + 'if.' + str(
            self._counter) + '.L2'

        while token != '}':
            token = self.CompileStatements(token)

        self._vm_writer.write_goto(goto_label)
        self._vm_writer.write_label(label)

        # optional else Command
        token = str(self._tokenizer.next_token())
        if token == "else":
            token = self.CompileElse(token)

        self._vm_writer.write_label(goto_label)

        return token

    def CompileElse(self, token):
        """
        Takes 'else' token and return next statement token
        """

        str(self._tokenizer.next_token())  # '{'

        token = str(self._tokenizer.next_token())
        while token != '}':
            token = self.CompileStatements(token)

        token = str(self._tokenizer.next_token())
        return token

    def CompileWhile(self, token):
        """
        Takes 'while' token and returns next statement token
        """
        self._counter += 1  # for linear label names

        label = self._class_name + '.' + 'while.' + str(self._counter) + '.L1'
        self._vm_writer.write_label(label)

        str(self._tokenizer.next_token())  # '('

        token = str(self._tokenizer.next_token())
        token = self.CompileExpression(token)  # 'returns ')'

        self._vm_writer.write_arithmatic('~')  # ~cond

        if_label = self._class_name + '.' + 'while.' + str(
            self._counter) + '.L2'
        self._vm_writer.write_if_goto(if_label)

        str(self._tokenizer.next_token())  # '{'

        token = str(self._tokenizer.next_token())
        while token != '}':
            token = self.CompileStatements(token)

        self._vm_writer.write_goto(label)  # 'goto label'
        self._vm_writer.write_label(if_label)  # label for next statement

        token = str(self._tokenizer.next_token())
        return token

    def CompileDo(self, token):
        identifier = str(
            self._tokenizer.next_token())  # identifer or class name

        token = str(self._tokenizer.next_token())
        class_name = identifier
        no_of_arguments = 0
        if token == ".":
            method_or_function = str(self._tokenizer.next_token())
            str(self._tokenizer.next_token())  # '('
            id_type = self._symbol_table.typeOf(identifier)

        else:
            class_name = self._class_name
            method_or_function = identifier
            no_of_arguments += 1
            self._vm_writer.write_push('pointer', '0')
            id_type = None

        token = str(self._tokenizer.next_token())

        if id_type != None:
            segment = self._symbol_table.kindOf(identifier)
            index = self._symbol_table.indexOf(identifier)
            self._vm_writer.write_push(segment, index)
            no_of_arguments += 1
            class_name = id_type

        no_arguments = 0
        if token != ')':
            token, no_arguments = self.CompilerExpressionList(
                token)  # return value is ')'

        no_of_arguments += no_arguments

        self._vm_writer.write_call(class_name, method_or_function,
                                   no_of_arguments)
        str(self._tokenizer.next_token())  # ';'

        # 'void functions will return constant 0 which should be discarded'
        self._vm_writer.write_pop('temp', '0')
        token = str(self._tokenizer.next_token())
        return token

    def CompileLet(self, token):
        """
        Function receiver 'let' and return ';'
        """
        identifier = str(
            self._tokenizer.next_token())  # left hand side identifier
        segment = self._symbol_table.kindOf(identifier)
        index = str(self._symbol_table.indexOf(identifier))

        token = str(self._tokenizer.next_token())  # = or [
        if_array = False
        if token == '[':
            if_array = True
            token = str(self._tokenizer.next_token())
            token = self.CompileExpression(token)  # ']'
            self._vm_writer.write_push(segment, index)
            self._vm_writer.write_arithmatic('+')

            # Equal Expression
            token = str(self._tokenizer.next_token())

        # Right Hand Side Expression
        token = str(self._tokenizer.next_token())
        token = self.CompileExpression(token)

        # End Statements
        if if_array:
            self._vm_writer.write_pop('temp', 0)
            self._vm_writer.write_pop('pointer', 1)
            self._vm_writer.write_push('temp', 0)
            self._vm_writer.write_pop('that', 0)
        else:
            self._vm_writer.write_pop(segment, index)

        token = str(self._tokenizer.next_token())
        return token

    def CompileReturn(self, token):
        """
        Takes 'return' token
        if simple return pushes dummy constant and returns 0
        """
        token = str(self._tokenizer.next_token())  # ';'?
        if token == ';':
            self._vm_writer.write_push('constant', '0')
        else:
            token = self.CompileExpression(token)  # ';'

        self._vm_writer.write_return()
        return str(self._tokenizer.next_token())

    def CompilerExpressionList(self, token):
        no_of_argument = 1
        token = self.CompileExpression(token)  # returns ','

        while token == ",":
            no_of_argument += 1
            token = str(self._tokenizer.next_token())
            token = self.CompileExpression(token)
        return token, no_of_argument

    def CompileExpression(self, token):
        """
        Expression 
        """
        token = self.CompileTerm(token)

        if token in Lexical.OP:
            operator = token
            token = str(self._tokenizer.next_token())  # Next term
            token = self.CompileTerm(token)
            self._vm_writer.write_arithmatic(operator)
        return token

    def CompileTerm(self, token):
        """
        Takes the term token and returns the token after the term
        """
        if token.isdigit():
            self._vm_writer.write_push('constant', token)
        elif token[0] == '"':
            no_of_character = len(token) - 2  # removing "
            self._vm_writer.write_push('constant', no_of_character)
            self._vm_writer.write_call('String', 'new', 1)
            for idx in range(1, len(token) - 1):
                self._vm_writer.write_push('constant', ord(token[idx]))
                self._vm_writer.write_call('String', 'appendChar', 2)
        elif token == 'true':
            self._vm_writer.write_push('constant', '1')
            self._vm_writer.write_arithmatic('-', 'NEG')
        elif token in ['false', 'null']:
            self._vm_writer.write_push('constant', '0')
        elif token == 'this':
            self._vm_writer.write_push('pointer', '0')
        elif token == '-':
            return self.CompileNegOperator(token)
        elif token == "~":
            return self.CompileNotOperator(token)
        elif token == "(":
            token = str(self._tokenizer.next_token())  # Term token
            token = self.CompileExpression(token)  # Returns ')'
        elif self._tokenizer.expected_token() == "[":

            identifier = token
            index = self._symbol_table.indexOf(identifier)
            segment = self._symbol_table.kindOf(identifier)
            self._vm_writer.write_push(segment, index)

            str(self._tokenizer.next_token())  # '['

            token = str(self._tokenizer.next_token())
            token = self.CompileExpression(token)  # return value is ']'

            self._vm_writer.write_arithmatic('+')
            self._vm_writer.write_pop('pointer', '1')
            self._vm_writer.write_push('that', '0')

        elif self._tokenizer.expected_token() == ".":
            identifier = token
            str(self._tokenizer.next_token())  # '.'
            method_or_function = str(self._tokenizer.next_token())

            str(self._tokenizer.next_token())  # '('

            token = str(self._tokenizer.next_token())
            no_of_arguments = 0

            class_name = identifier
            id_type = self._symbol_table.typeOf(identifier)
            print identifier, id_type
            if id_type != None:
                segment = self._symbol_table.kindOf(identifier)
                index = self._symbol_table.indexOf(identifier)
                self._vm_writer.write_push(segment, index)
                no_of_arguments += 1
                class_name = id_type

            no_arguments = 0
            if token != ")":
                token, no_arguments = self.CompilerExpressionList(token)

            no_of_arguments += no_arguments
            self._vm_writer.write_call(class_name, method_or_function,
                                       no_of_arguments)
        else:
            identifier = token
            index = self._symbol_table.indexOf(identifier)
            segment = self._symbol_table.kindOf(identifier)
            self._vm_writer.write_push(segment, index)

        token = str(self._tokenizer.next_token())
        return token

    def CompileNegOperator(self, token):
        token = str(self._tokenizer.next_token())
        token = self.CompileTerm(token)
        self._vm_writer.write_arithmatic('-', 'NEG')
        return token

    def CompileNotOperator(self, token):
        """
        Takes '~' as argument as return ')'
        """
        token = str(self._tokenizer.next_token())  # '('?
        if token != '(':
            token = self.CompileTerm(token)
        else:
            token = str(self._tokenizer.next_token())  #
            token = self.CompileExpression(token)  # returns inner ')' res
            token = str(self._tokenizer.next_token())  # outer ')'

        self._vm_writer.write_arithmatic('~')
        return token

    def CompileParamList(self, token):
        """
        Takes type of the first argument of the 
        subroutine
        """
        id_type = token  # type of var variable
        kind = 'argument'
        identifier = str(self._tokenizer.next_token())  # identifier name
        identifier_details = [identifier, id_type, kind]
        self._symbol_table.define(identifier_details)

        token = str(self._tokenizer.next_token())
        if token == ',':
            token = str(self._tokenizer.next_token())
            return self.CompileParamList(token)
        return token

    def CompileVarDec(self, token):
        """
        Takes either of 'field' or 'static' as token
        return next statement either 'var' or do, let, if, while 
        """
        id_type = str(self._tokenizer.next_token())  # type of var variable
        kind = 'local'
        identifier = str(self._tokenizer.next_token())  # identifier name
        identifier_details = [identifier, id_type, kind]
        self._symbol_table.define(identifier_details)
        token = str(self._tokenizer.next_token())  # ',' or ';

        while token == ',':
            identifier_details = []
            identifier = str(self._tokenizer.next_token())  # identifier name
            identifier_details = [identifier, id_type, kind]
            self._symbol_table.define(identifier_details)
            token = str(self._tokenizer.next_token())  # ',' or ';

        return str(self._tokenizer.next_token())

    def CompileClassVarDec(self, token):
        class_var_modifer = str(token)  # 'field' or 'static'

        # primitive or user defined class
        class_var_type = str(self._tokenizer.next_token())
        identifier = str(self._tokenizer.next_token())

        identifier_details = [identifier, class_var_type, class_var_modifer]
        self._symbol_table.define(identifier_details)

        token = self._tokenizer.next_token()

        while token == ',':
            identifier = str(self._tokenizer.next_token())
            identifier_details = [
                identifier, class_var_type, class_var_modifer
            ]
            self._symbol_table.define(identifier_details)
            token = str(self._tokenizer.next_token())

        token = self._tokenizer.next_token()

        if token in ['field', 'static']:
            return self.CompileClassVarDec(token)

        return token
Esempio n. 9
0
class CompliationEngine(object):
    """
    Effects the actual compilation output. Gets its input from a
    JackTokenizer and emits its parsed structure into an output file/stream
    """

    MAP = {"<": "&lt;", ">": "&gt;", '"': "&quot;", "&": "&amp;"}

    def __init__(self, tokenizer, out_file_name):
        """
        Constructor
        """
        self._tokenizer = tokenizer
        self._vm_writer = VMWriter(out_file_name)
        self._class_name = None
        self._symbol_table = SymbolTable()
        self._counter = 0
        self._subroutine_name = None

    def Compile(self):
        token = str(self._tokenizer.next_token())
        if token == "class":
            self.CompileClass(token)

    def CompileClass(self, token):
        """
        takes 'class' as token
        and end the compilation
        """
        self._class_name = self._tokenizer.next_token()  # got the class name
        str(self._tokenizer.next_token())  # '{'
        token = self._tokenizer.next_token()  # field declarations

        # For declaring Class Level Variable

        while token in ["field", "static"]:
            token = self.CompileClassVarDec(token)

        # Class Methods
        while token in ["function", "method", "constructor"]:
            token = self.CompileSubroutine(token)

        self._vm_writer.writer_close()
        self._symbol_table.printSymbolTables()

    def CompileSubroutine(self, token):
        """
        Takes any among 'function', 'method', 'constructor'
        and return token after end of subroutine '}' 
        or simple next subroutine token
        """
        function_modifier = token

        str(self._tokenizer.next_token())  # return type
        function_name = str(self._tokenizer.next_token())  # name of function

        self._subroutine_name = function_name

        self._symbol_table.startSubRoutine(function_name)
        if function_modifier == "method":
            self._symbol_table.define(["this", self._class_name, "argument"])

        str(self._tokenizer.next_token())  # '('

        token = str(self._tokenizer.next_token())  # 'arguments'

        while token != ")":
            token = self.CompileParamList(token)

        str(self._tokenizer.next_token())  # '{'
        token = str(self._tokenizer.next_token())  # Statements or '}'

        while token == "var":
            token = self.CompileVarDec(token)

        local_variables = self._symbol_table.varCount("local")

        # Writing Function VM
        self._vm_writer.write_subroutine(self._class_name, function_name, local_variables)
        if function_name == "new":
            no_of_fields = self._symbol_table.varCount("field")
            self._vm_writer.write_push("constant", no_of_fields)
            self._vm_writer.write_call("Memory", "alloc", 1)
            self._vm_writer.write_pop("pointer", 0)
        if function_modifier == "method":
            self._vm_writer.write_push("argument", 0)
            self._vm_writer.write_pop("pointer", 0)
        """temp_buffer = ""
        while local_variables > 0:
            temp_buffer += 'push constant 0\n'
            local_variables -= 1
        
        self._out_file_object.write(temp_buffer)
        self._out_file_object.flush()"""

        while token != "}":
            token = self.CompileStatements(token)

        token = str(self._tokenizer.next_token())  # next subroutine
        return token

    def CompileStatements(self, token):
        if token == "return":
            return self.CompileReturn(token)
        if token == "do":
            return self.CompileDo(token)
        if token == "let":
            return self.CompileLet(token)
        if token == "while":
            return self.CompileWhile(token)
        if token == "if":
            return self.CompileIf(token)

    def CompileIf(self, token):
        """
        Takes 'if' keyword and returns next statement token
        """
        self._counter += 1  # for linear label names
        str(self._tokenizer.next_token())  # '('

        token = str(self._tokenizer.next_token())
        token = self.CompileExpression(token)  # returns ')'

        self._vm_writer.write_arithmatic("~")
        label = self._class_name + "." + "if." + str(self._counter) + ".L1"
        self._vm_writer.write_if_goto(label)

        str(self._tokenizer.next_token())  # '}'
        token = str(self._tokenizer.next_token())

        goto_label = self._class_name + "." + "if." + str(self._counter) + ".L2"

        while token != "}":
            token = self.CompileStatements(token)

        self._vm_writer.write_goto(goto_label)
        self._vm_writer.write_label(label)

        # optional else Command
        token = str(self._tokenizer.next_token())
        if token == "else":
            token = self.CompileElse(token)

        self._vm_writer.write_label(goto_label)

        return token

    def CompileElse(self, token):
        """
        Takes 'else' token and return next statement token
        """

        str(self._tokenizer.next_token())  # '{'

        token = str(self._tokenizer.next_token())
        while token != "}":
            token = self.CompileStatements(token)

        token = str(self._tokenizer.next_token())
        return token

    def CompileWhile(self, token):
        """
        Takes 'while' token and returns next statement token
        """
        self._counter += 1  # for linear label names

        label = self._class_name + "." + "while." + str(self._counter) + ".L1"
        self._vm_writer.write_label(label)

        str(self._tokenizer.next_token())  # '('

        token = str(self._tokenizer.next_token())
        token = self.CompileExpression(token)  # 'returns ')'

        self._vm_writer.write_arithmatic("~")  # ~cond

        if_label = self._class_name + "." + "while." + str(self._counter) + ".L2"
        self._vm_writer.write_if_goto(if_label)

        str(self._tokenizer.next_token())  # '{'

        token = str(self._tokenizer.next_token())
        while token != "}":
            token = self.CompileStatements(token)

        self._vm_writer.write_goto(label)  # 'goto label'
        self._vm_writer.write_label(if_label)  # label for next statement

        token = str(self._tokenizer.next_token())
        return token

    def CompileDo(self, token):
        identifier = str(self._tokenizer.next_token())  # identifer or class name

        token = str(self._tokenizer.next_token())
        class_name = identifier
        no_of_arguments = 0
        if token == ".":
            method_or_function = str(self._tokenizer.next_token())
            str(self._tokenizer.next_token())  # '('
            id_type = self._symbol_table.typeOf(identifier)

        else:
            class_name = self._class_name
            method_or_function = identifier
            no_of_arguments += 1
            self._vm_writer.write_push("pointer", "0")
            id_type = None

        token = str(self._tokenizer.next_token())

        if id_type != None:
            segment = self._symbol_table.kindOf(identifier)
            index = self._symbol_table.indexOf(identifier)
            self._vm_writer.write_push(segment, index)
            no_of_arguments += 1
            class_name = id_type

        no_arguments = 0
        if token != ")":
            token, no_arguments = self.CompilerExpressionList(token)  # return value is ')'

        no_of_arguments += no_arguments

        self._vm_writer.write_call(class_name, method_or_function, no_of_arguments)
        str(self._tokenizer.next_token())  # ';'

        # 'void functions will return constant 0 which should be discarded'
        self._vm_writer.write_pop("temp", "0")
        token = str(self._tokenizer.next_token())
        return token

    def CompileLet(self, token):
        """
        Function receiver 'let' and return ';'
        """
        identifier = str(self._tokenizer.next_token())  # left hand side identifier
        segment = self._symbol_table.kindOf(identifier)
        index = str(self._symbol_table.indexOf(identifier))

        token = str(self._tokenizer.next_token())  # = or [
        if_array = False
        if token == "[":
            if_array = True
            token = str(self._tokenizer.next_token())
            token = self.CompileExpression(token)  # ']'
            self._vm_writer.write_push(segment, index)
            self._vm_writer.write_arithmatic("+")

            # Equal Expression
            token = str(self._tokenizer.next_token())

        # Right Hand Side Expression
        token = str(self._tokenizer.next_token())
        token = self.CompileExpression(token)

        # End Statements
        if if_array:
            self._vm_writer.write_pop("temp", 0)
            self._vm_writer.write_pop("pointer", 1)
            self._vm_writer.write_push("temp", 0)
            self._vm_writer.write_pop("that", 0)
        else:
            self._vm_writer.write_pop(segment, index)

        token = str(self._tokenizer.next_token())
        return token

    def CompileReturn(self, token):
        """
        Takes 'return' token
        if simple return pushes dummy constant and returns 0
        """
        token = str(self._tokenizer.next_token())  # ';'?
        if token == ";":
            self._vm_writer.write_push("constant", "0")
        else:
            token = self.CompileExpression(token)  # ';'

        self._vm_writer.write_return()
        return str(self._tokenizer.next_token())

    def CompilerExpressionList(self, token):
        no_of_argument = 1
        token = self.CompileExpression(token)  # returns ','

        while token == ",":
            no_of_argument += 1
            token = str(self._tokenizer.next_token())
            token = self.CompileExpression(token)
        return token, no_of_argument

    def CompileExpression(self, token):
        """
        Expression 
        """
        token = self.CompileTerm(token)

        if token in Lexical.OP:
            operator = token
            token = str(self._tokenizer.next_token())  # Next term
            token = self.CompileTerm(token)
            self._vm_writer.write_arithmatic(operator)
        return token

    def CompileTerm(self, token):
        """
        Takes the term token and returns the token after the term
        """
        if token.isdigit():
            self._vm_writer.write_push("constant", token)
        elif token[0] == '"':
            no_of_character = len(token) - 2  # removing "
            self._vm_writer.write_push("constant", no_of_character)
            self._vm_writer.write_call("String", "new", 1)
            for idx in range(1, len(token) - 1):
                self._vm_writer.write_push("constant", ord(token[idx]))
                self._vm_writer.write_call("String", "appendChar", 2)
        elif token == "true":
            self._vm_writer.write_push("constant", "1")
            self._vm_writer.write_arithmatic("-", "NEG")
        elif token in ["false", "null"]:
            self._vm_writer.write_push("constant", "0")
        elif token == "this":
            self._vm_writer.write_push("pointer", "0")
        elif token == "-":
            return self.CompileNegOperator(token)
        elif token == "~":
            return self.CompileNotOperator(token)
        elif token == "(":
            token = str(self._tokenizer.next_token())  # Term token
            token = self.CompileExpression(token)  # Returns ')'
        elif self._tokenizer.expected_token() == "[":

            identifier = token
            index = self._symbol_table.indexOf(identifier)
            segment = self._symbol_table.kindOf(identifier)
            self._vm_writer.write_push(segment, index)

            str(self._tokenizer.next_token())  # '['

            token = str(self._tokenizer.next_token())
            token = self.CompileExpression(token)  # return value is ']'

            self._vm_writer.write_arithmatic("+")
            self._vm_writer.write_pop("pointer", "1")
            self._vm_writer.write_push("that", "0")

        elif self._tokenizer.expected_token() == ".":
            identifier = token
            str(self._tokenizer.next_token())  # '.'
            method_or_function = str(self._tokenizer.next_token())

            str(self._tokenizer.next_token())  # '('

            token = str(self._tokenizer.next_token())
            no_of_arguments = 0

            class_name = identifier
            id_type = self._symbol_table.typeOf(identifier)
            print identifier, id_type
            if id_type != None:
                segment = self._symbol_table.kindOf(identifier)
                index = self._symbol_table.indexOf(identifier)
                self._vm_writer.write_push(segment, index)
                no_of_arguments += 1
                class_name = id_type

            no_arguments = 0
            if token != ")":
                token, no_arguments = self.CompilerExpressionList(token)

            no_of_arguments += no_arguments
            self._vm_writer.write_call(class_name, method_or_function, no_of_arguments)
        else:
            identifier = token
            index = self._symbol_table.indexOf(identifier)
            segment = self._symbol_table.kindOf(identifier)
            self._vm_writer.write_push(segment, index)

        token = str(self._tokenizer.next_token())
        return token

    def CompileNegOperator(self, token):
        token = str(self._tokenizer.next_token())
        token = self.CompileTerm(token)
        self._vm_writer.write_arithmatic("-", "NEG")
        return token

    def CompileNotOperator(self, token):
        """
        Takes '~' as argument as return ')'
        """
        token = str(self._tokenizer.next_token())  # '('?
        if token != "(":
            token = self.CompileTerm(token)
        else:
            token = str(self._tokenizer.next_token())  #
            token = self.CompileExpression(token)  # returns inner ')' res
            token = str(self._tokenizer.next_token())  # outer ')'

        self._vm_writer.write_arithmatic("~")
        return token

    def CompileParamList(self, token):
        """
        Takes type of the first argument of the 
        subroutine
        """
        id_type = token  # type of var variable
        kind = "argument"
        identifier = str(self._tokenizer.next_token())  # identifier name
        identifier_details = [identifier, id_type, kind]
        self._symbol_table.define(identifier_details)

        token = str(self._tokenizer.next_token())
        if token == ",":
            token = str(self._tokenizer.next_token())
            return self.CompileParamList(token)
        return token

    def CompileVarDec(self, token):
        """
        Takes either of 'field' or 'static' as token
        return next statement either 'var' or do, let, if, while 
        """
        id_type = str(self._tokenizer.next_token())  # type of var variable
        kind = "local"
        identifier = str(self._tokenizer.next_token())  # identifier name
        identifier_details = [identifier, id_type, kind]
        self._symbol_table.define(identifier_details)
        token = str(self._tokenizer.next_token())  # ',' or ';

        while token == ",":
            identifier_details = []
            identifier = str(self._tokenizer.next_token())  # identifier name
            identifier_details = [identifier, id_type, kind]
            self._symbol_table.define(identifier_details)
            token = str(self._tokenizer.next_token())  # ',' or ';

        return str(self._tokenizer.next_token())

    def CompileClassVarDec(self, token):
        class_var_modifer = str(token)  # 'field' or 'static'

        # primitive or user defined class
        class_var_type = str(self._tokenizer.next_token())
        identifier = str(self._tokenizer.next_token())

        identifier_details = [identifier, class_var_type, class_var_modifer]
        self._symbol_table.define(identifier_details)

        token = self._tokenizer.next_token()

        while token == ",":
            identifier = str(self._tokenizer.next_token())
            identifier_details = [identifier, class_var_type, class_var_modifer]
            self._symbol_table.define(identifier_details)
            token = str(self._tokenizer.next_token())

        token = self._tokenizer.next_token()

        if token in ["field", "static"]:
            return self.CompileClassVarDec(token)

        return token
class CompilationEngine:
    def __init__(self, _tokens, _in_path, _out_file):
        self.tokens = iter(_tokens)
        self.file_name = str(split(_in_path)[1].split('.')[0])
        self.out_file = _out_file
        self.writer = VMWriter(_out_file)
        self.sym_table = SymbolTable()
        self.class_name = ''
        self.curr_subroutine_name = ''
        self.curr_cond_index = 0

    def CompileClass(self):
        self.tokens.__next__()
        self.class_name = self.tokens.__next__().token
        self.tokens.__next__()  # {
        self.CompileClassVarDec()
        curr_token = self.tokens.__next__()
        while curr_token.token in [
                Keyword.FUNCTION.value, Keyword.METHOD.value,
                Keyword.CONSTRUCTOR.value
        ]:
            self.CompileSubroutineDEC(curr_token.token)
            curr_token = self.tokens.__next__()

    def CompileClassVarDec(self):
        sym_kind = self.tokens.__next__()
        while sym_kind.token == Keyword.FIELD.value or sym_kind.token == Keyword.STATIC.value:
            sym_type = self.tokens.__next__()
            moreVars = True
            while moreVars:
                sym_name = self.tokens.__next__()
                moreVars = self.tokens.__next__().token == Symbol.COMMA.value
                self.sym_table.define(sym_name.token, sym_type.token,
                                      sym_kind.token)
            sym_kind = self.tokens.__next__()
        self.tokens = itertools.chain([sym_kind], self.tokens)

    def CompileSubroutineDEC(self, sub_type):
        self.sym_table.startSubroutine()
        return_type = self.tokens.__next__()  # return type
        self.curr_subroutine_name = self.file_name + '.' + self.tokens.__next__(
        ).token
        self.tokens.__next__()  # (
        if sub_type == constants.METHOD:
            self.sym_table.define('this', self.class_name, constants.ARG)
        self.CompileParameterList()
        self.tokens.__next__()  # )
        self.CompileSubroutineBody(sub_type)

    def CompileParameterList(self):
        arg_type = self.tokens.__next__()
        if arg_type.token == Symbol.PAREN_CLOSE.value:
            self.tokens = itertools.chain([arg_type], self.tokens)
        else:
            moreVars = True
            while moreVars:
                arg_name = self.tokens.__next__()
                curr_token = self.tokens.__next__()  # , or  )
                moreVars = curr_token.token == Symbol.COMMA.value
                self.sym_table.define(arg_name.token, arg_type.token,
                                      constants.ARG)
                if moreVars:
                    arg_type = self.tokens.__next__()
                else:
                    self.tokens = itertools.chain([curr_token], self.tokens)

    def CompileSubroutineBody(self, sub_type):
        self.tokens.__next__()  # {
        self.CompileVarDec()
        self.writer.write_function(self.curr_subroutine_name,
                                   self.sym_table.var_count(constants.VAR))
        if sub_type == constants.METHOD:
            self.writer.write_push(constants.ARG, 0)
            self.writer.write_pop(constants.POINTER, 0)
        if sub_type == Keyword.CONSTRUCTOR.value:
            self.CompileCtorAlloc()
        self.CompileStatements()
        self.tokens.__next__()  # }

    def CompileCtorAlloc(self):
        self.writer.write_push(constants.CONST,
                               self.sym_table.var_count(constants.FIELD))
        self.writer.write_call('Memory.alloc', 1)
        self.writer.write_pop(constants.POINTER, 0)

    def CompileVarDec(self):
        curr_token = self.tokens.__next__()
        hasVars = curr_token.token == Keyword.VAR.value
        while hasVars:
            var_type = self.tokens.__next__()  # type
            self.CompileInlineVars(var_type.token)
            curr_token = self.tokens.__next__()
            hasVars = curr_token.token == Keyword.VAR.value
        self.tokens = itertools.chain([curr_token], self.tokens)

    def CompileInlineVars(self, var_type):
        hasVarsInline = True
        while hasVarsInline:
            var_name = self.tokens.__next__()  #ident
            curr_token = self.tokens.__next__()  # , or ;
            hasVarsInline = (curr_token.token == Symbol.COMMA.value)
            self.sym_table.define(var_name.token, var_type, constants.VAR)

    def CompileStatements(self):
        state = self.tokens.__next__()
        while state.token in [
                Keyword.LET.value, Keyword.DO.value, Keyword.RETURN.value,
                Keyword.WHILE.value, Keyword.IF.value
        ]:
            if state.token == Keyword.LET.value:
                self.CompileLet()
            elif state.token == Keyword.WHILE.value:
                self.CompileWhile()
            elif state.token == Keyword.RETURN.value:
                self.CompileReturn()
            elif state.token == Keyword.IF.value:
                self.CompileIf()
            elif state.token == Keyword.DO.value:
                self.CompileDo()
            state = self.tokens.__next__()
        self.tokens = itertools.chain([state], self.tokens)

    def CompileLet(self):
        left_value = self.tokens.__next__()  #var name
        segment = segment_map[self.sym_table.kind_of(left_value.token)]
        index = self.sym_table.index_of(left_value.token)
        curr_token = self.tokens.__next__()
        if curr_token.token == Symbol.BRACKET_OPEN.value:
            self.CompileArrayAccess(segment, index, True)
        else:
            self.CompileExpression()
            self.writer.write_pop(segment, index)
        self.tokens.__next__()  # ;

    def CompileWhile(self):
        L1 = 'L' + str(self.curr_cond_index)
        self.curr_cond_index += 1
        L2 = 'L' + str(self.curr_cond_index)
        self.curr_cond_index += 1
        self.tokens.__next__()  # (
        self.writer.write_label(L1)
        self.CompileExpression()
        self.tokens.__next__()  # )
        self.writer.write_arithmetic(constants.NOT)
        self.writer.write_if(L2)
        self.tokens.__next__()  # {
        self.CompileStatements()
        self.writer.write_goto(L1)
        self.tokens.__next__()  # }
        self.writer.write_label(L2)

    def CompileIf(self):
        L1 = 'L' + str(self.curr_cond_index)
        self.curr_cond_index += 1
        L2 = 'L' + str(self.curr_cond_index)
        self.curr_cond_index += 1
        self.tokens.__next__()  # (
        self.CompileExpression()
        self.writer.write_arithmetic(constants.NOT)
        self.writer.write_if(L1)
        self.tokens.__next__()  # )
        self.tokens.__next__()  # {
        self.CompileStatements()
        self.writer.write_goto(L2)
        self.writer.write_label(L1)
        self.tokens.__next__()  # }
        curr_token = self.tokens.__next__()
        if curr_token.token == Keyword.ELSE.value:
            self.tokens.__next__()  # {
            self.CompileStatements()
            self.tokens.__next__()  # }
        else:
            self.tokens = itertools.chain([curr_token], self.tokens)
        self.writer.write_label(L2)

    def CompileReturn(self):
        curr_token = self.tokens.__next__()
        if curr_token.token != Symbol.SEMI_COLON.value:
            self.tokens = itertools.chain([curr_token], self.tokens)
            self.CompileExpression()
            self.tokens.__next__()  # ;
        else:
            self.writer.write_push(constants.CONST, 0)
        self.writer.write_return()

    def CompileDo(self):
        self.compileSubroutineCall(True)
        self.tokens.__next__()  # ;

    def CompileExpression(self):
        self.CompileTerm()
        curr_token = self.tokens.__next__()
        if curr_token.token in [operator.value for operator in Operator]:
            self.CompileTerm()
            self.writer.write_arithmetic(curr_token.token)
        else:
            self.tokens = itertools.chain([curr_token], self.tokens)

    def CompileTerm(self):
        curr_token = self.tokens.__next__()
        if curr_token.type == TokenType.IDENTIFIER.value:
            next_token = self.tokens.__next__()
            if next_token.token == Symbol.BRACKET_OPEN.value:
                curr_token_kind = segment_map[self.sym_table.kind_of(
                    curr_token.token)]
                curr_token_index = self.sym_table.index_of(curr_token.token)
                self.CompileArrayAccess(curr_token_kind, curr_token_index,
                                        False)
            elif next_token.token == Symbol.PERIOD.value or next_token.token == Symbol.PAREN_OPEN.value:
                self.tokens = itertools.chain([next_token], self.tokens)
                self.tokens = itertools.chain([curr_token], self.tokens)
                self.compileSubroutineCall(False)
            else:
                self.tokens = itertools.chain([next_token], self.tokens)
                self.writer.write_push(
                    segment_map[self.sym_table.kind_of(curr_token.token)],
                    self.sym_table.index_of(curr_token.token))
        elif curr_token.type == constants.TokenType.INT_CONST.value:
            self.writer.write_push(constants.CONST, str(curr_token.token))
        elif curr_token.type == constants.TokenType.STRING_CONST.value:
            self.CompileStringConstant(curr_token.token)
        elif curr_token.token == constants.Keyword.THIS.value:
            self.writer.write_push(constants.POINTER, 0)
        elif curr_token.type == constants.TokenType.KEYWORD.value:
            if curr_token.token == 'null' or curr_token.token == 'false':
                self.writer.write_push(constants.CONST, 0)
            elif curr_token.token == 'true':
                self.writer.write_push(constants.CONST, 1)
                self.writer.write_arithmetic('neg')
        else:
            if curr_token.token in [unary.value for unary in UnaryOperator]:
                self.CompileTerm()
                self.writer.write_arithmetic(
                    constants.unary_operators_map[curr_token.token])
            elif curr_token.token == Symbol.PAREN_OPEN.value:
                self.CompileExpression()
                self.tokens.__next__()  # )

    def CompileStringConstant(self, str_const):
        self.writer.write_push(constants.CONST, len(str_const))
        self.writer.write_call('String.new', 1)
        for i in range(len(str_const)):
            self.writer.write_push(constants.CONST, ord(str_const[i]))
            self.writer.write_call('String.appendChar', 2)

    def CompileArrayAccess(self, arr_kind, arr_index, is_let):
        self.writer.write_push(arr_kind, arr_index)  # arr
        self.CompileExpression()  # arr[expres 1]
        self.writer.write_arithmetic('add')
        self.tokens.__next__()  # ]
        if is_let:
            self.tokens.__next__()  # =
            self.CompileExpression()
            self.writer.write_pop(constants.TEMP, 0)
            self.writer.write_pop(constants.POINTER, 1)
            self.writer.write_push(constants.TEMP, 0)
            self.writer.write_pop(constants.THAT, 0)
        else:
            self.writer.write_pop(constants.POINTER, 1)
            self.writer.write_push(constants.THAT, 0)

    def CompileExpressionList(self):
        num_exprss = 0
        curr_token = self.tokens.__next__()
        if curr_token.token != Symbol.PAREN_CLOSE.value:
            moreExpr = True
            self.tokens = itertools.chain([curr_token], self.tokens)
            while moreExpr:
                num_exprss += 1
                self.CompileExpression()
                curr_token = self.tokens.__next__()
                moreExpr = curr_token.token == Symbol.COMMA.value
                if not moreExpr:
                    self.tokens = itertools.chain([curr_token], self.tokens)
        else:
            self.tokens = itertools.chain([curr_token], self.tokens)
        return num_exprss

    def compileSubroutineCall(self, is_void):
        num_express = 0
        prefix = self.tokens.__next__().token
        next_token = self.tokens.__next__()  # ( or .
        if next_token.token == Symbol.PERIOD.value and \
                self.sym_table.type_of(prefix) is not None:
            function_name = self.sym_table.type_of(
                prefix) + '.' + self.tokens.__next__().token
            self.tokens.__next__()  # (
            self.writer.write_push(segment_map[self.sym_table.kind_of(prefix)],
                                   self.sym_table.index_of(prefix))
            num_express = 1
        elif next_token.token == Symbol.PERIOD.value:
            function_name = prefix + '.' + self.tokens.__next__().token
            self.tokens.__next__()  # (
        else:
            function_name = self.class_name + '.' + prefix
            self.writer.write_push(constants.POINTER, 0)
            num_express = 1

        num_express += self.CompileExpressionList()
        self.writer.write_call(function_name, num_express)
        if is_void:
            self.writer.write_pop(constants.TEMP, 0)
        self.tokens.__next__()  # )
class CompilationEngine:
    all_operators = {
        "+": "add",
        "-": "sub",
        "/": "div",
        "*": "mul",
        "&amp;": "and",
        "|": "or",
        "&gt;": "gt",
        "&lt;": "lt",
        "=": "eq"
    }

    def __init__(self, tokens, out_file):
        """
        initializing a new compile engine object
        :param tokens: the list of tokens created by the tokenizer
        :param out_file: the output file.
        """
        self.__tokens = tokens
        self.__file = out_file
        self.__i = 0
        self.__class_symbol = SymbolTable()
        self.__subroutine_symbol = SymbolTable()
        self.__cur_token = ()
        self.__class_name = ""
        self.__writer = VMWriter(out_file)
        self.__label_count = 0
        self.compile_class()
        self.__writer.close()

    def eat(self):
        """
        compiling a single token and move to the next one
        """
        self.__cur_token = self.__tokens[self.__i]
        self.__i += 1

    def get_token(self):
        return self.__cur_token[1]

    def peek(self):
        """
        checking the current token without compiling
        :return: the token
        """
        ret_val = self.__tokens[self.__i]
        return ret_val[1]

    def peek_type(self):
        """
        checking the current token type without compiling
        :return: the token type
        """
        ret_val = self.__tokens[self.__i]
        return ret_val[0]

    def peek_ll2(self):
        """
        checking two tokens ahead without compiling
        :return: the token
        """
        ret_val = self.__tokens[self.__i + 1]
        return ret_val[1]

    def compile_while_stat(self):  # i points to while
        """
        compiling while statement
        """
        self.eat()
        self.eat()
        label_true = "L%s" % self.__label_count
        self.__label_count += 1
        label_continue = "L%s" % self.__label_count
        self.__label_count += 1
        self.__writer.write_label(label_true)
        self.compile_expression()
        self.__writer.write_arithmetic("not")
        self.__writer.write_if(label_continue)
        self.eat()
        self.eat()
        self.compile_statements()
        self.__writer.write_go_to(label_true)
        self.eat()
        self.__writer.write_label(label_continue)

    def compile_return_stat(self):  # i points to return
        """
        compiling return statement
        """
        self.eat()
        if not self.peek() == ";":
            self.compile_expression()
        else:
            self.__writer.write_push("constant", 0)
        self.__writer.write_return()
        self.eat()

    def compile_do_stat(self):
        """
        compiling do statement
        """
        self.eat()
        self.compile_subroutine_call()
        self.__writer.write_pop("temp", 0)
        self.eat()

    def compile_if_stat(self):
        """
        compiling if statement
        """
        self.eat()
        self.eat()
        self.compile_expression()
        self.__writer.write_arithmetic("not")
        label_false = "L%s" % self.__label_count
        self.__label_count += 1
        label_continue = "L%s" % self.__label_count
        self.__label_count += 1
        self.__writer.write_if(label_false)
        self.eat()
        self.eat()
        self.compile_statements()
        self.__writer.write_go_to(label_continue)
        self.eat()
        self.__writer.write_label(label_false)
        if self.peek() == "else":
            self.eat()
            self.eat()
            self.compile_statements()
            self.eat()
        self.__writer.write_label(label_continue)

    def compile_class_var_dec(self):
        """
        compiling class variable declaration
        """
        self.eat()
        kind = self.get_token()
        if kind == "var":
            kind = SymbolTable.VAR
        self.var_dec_helper(kind, self.__class_symbol)

    def compile_var_dec(self):
        """
        compiling variable declaration
        """
        self.eat()
        self.var_dec_helper(SymbolTable.VAR, self.__subroutine_symbol)

    def var_dec_helper(self, kind, symbol_table):

        self.eat()
        type = self.get_token()
        self.eat()
        name = self.get_token()
        symbol_table.add(name, type, kind)
        cur_stat = self.peek()
        while cur_stat != ";":
            self.eat()
            self.eat()
            name = self.get_token()
            symbol_table.add(name, type, kind)
            cur_stat = self.peek()
        self.eat()

    def compile_subroutine_body(self, func_name, func_type):
        """
        compiling subroutine body
        """
        self.eat()
        cur_stat = self.peek()
        while cur_stat == "var":
            self.compile_var_dec()
            cur_stat = self.peek()
        self.__writer.write_function(
            func_name, self.__subroutine_symbol.var_count(SymbolTable.VAR))
        self.__subroutine_symbol.add("this", self.__class_name, "pointer")
        if func_type == "method":
            self.__writer.write_push(SymbolTable.ARG, 0)
            self.__writer.write_pop("pointer", 0)

        elif func_type == "constructor":
            self.__writer.write_push(
                "constant", self.__class_symbol.var_count(SymbolTable.FIELD))
            self.__writer.write_call("Memory.alloc", 1)
            self.__writer.write_pop("pointer", 0)
        self.compile_statements()
        self.eat()

    def compile_parameter_list(self):
        """
        compiling parameters list
        """
        cur_stat = self.peek()
        if cur_stat != ")":
            self.eat()
            type = self.get_token()
            self.eat()
            name = self.get_token()
            self.__subroutine_symbol.add(name, type, SymbolTable.ARG)
            cur_stat = self.peek()

        while cur_stat == ",":
            self.eat()
            self.eat()
            type = self.get_token()
            self.eat()
            name = self.get_token()
            self.__subroutine_symbol.add(name, type, SymbolTable.ARG)
            cur_stat = self.peek()

    def compile_class(self):
        """
        compiling class
        """
        self.eat()
        self.eat()
        self.__class_name = self.get_token()
        self.eat()
        cur_stat = self.peek()

        while cur_stat == "static" or cur_stat == "field":
            self.compile_class_var_dec()
            cur_stat = self.peek()

        while cur_stat != "}":
            self.compile_subroutine_dec()
            cur_stat = self.peek()
        self.eat()

    def compile_expression(self):
        """
        compiling expression
        """
        self.compile_term()
        cur_stat = self.peek()
        while cur_stat in CompilationEngine.all_operators.keys():
            self.eat()
            self.compile_term()
            self.compile_operation(cur_stat)
            cur_stat = self.peek()

    def compile_operation(self, op):
        """
        compiling operation
        :param op: current op
        """
        if op == "*":
            self.__writer.write_call("Math.multiply", 2)

        elif op == "/":
            self.__writer.write_call("Math.divide", 2)

        else:
            self.__writer.write_arithmetic(CompilationEngine.all_operators[op])

    def compile_statements(self):
        """
        compiling statements
        """
        while self.compile_statement():
            continue

    def compile_subroutine_call(self):
        """
        compiling subroutine call
        """
        self.eat()
        name = self.get_token()
        cur_stat = self.peek()
        if cur_stat == "(":
            self.eat()
            self.__writer.write_push("pointer", 0)
            args = self.compile_expression_list()
            self.eat()
            self.__writer.write_call(self.__class_name + "." + name, args + 1)
        else:
            self.eat()
            val = self.find(name)
            self.eat()
            var_name = self.get_token()
            self.eat()
            if not val:
                args = 0
            else:
                self.__writer.push_val(val)
                name = val[0]
                args = 1

            args += self.compile_expression_list()
            self.__writer.write_call(name + "." + var_name, args)
            self.eat()

    def compile_expression_list(self):
        """
        compiling expression list
        """
        args = 0
        cur_stat = self.peek()
        if cur_stat != ")":
            self.compile_expression()
            args += 1
            cur_stat = self.peek()

        while cur_stat == ",":
            self.eat()
            args += 1
            self.compile_expression()
            cur_stat = self.peek()

        return args

    def compile_statement(self):
        """
        compiling statement
        """
        cur_stat = self.peek()
        if cur_stat == "if":
            self.compile_if_stat()
        elif cur_stat == "while":
            self.compile_while_stat()
        elif cur_stat == "do":
            self.compile_do_stat()
        elif cur_stat == "return":
            self.compile_return_stat()
        elif cur_stat == "let":
            self.compile_let_stat()
        else:
            return 0  # when there is no more statements to compile
        return 1

    def compile_let_stat(self):
        """
        compiling let statement
        """
        self.eat()
        self.eat()
        name = self.get_token()
        data = self.find(name)
        kind = data[1]
        ind = data[2]

        if kind == "field":
            kind = "this"

        cur_stat = self.peek()
        if cur_stat == "[":
            self.compile_array(kind, ind)
        else:
            self.eat()
            self.compile_expression()
            self.__writer.write_pop(kind, ind)
        self.eat()  # eat ;

    def compile_subroutine_dec(self):
        """
        compiling subroutine declaration
        """
        self.eat()
        func_type = self.get_token()
        self.eat()
        self.eat()
        func_name = self.__class_name + "." + self.get_token()
        self.eat()
        if func_type == "method":
            self.__subroutine_symbol.add("this", self.__class_name,
                                         SymbolTable.ARG)
        self.compile_parameter_list()
        self.eat()
        self.compile_subroutine_body(func_name, func_type)
        self.__subroutine_symbol = SymbolTable()

    def compile_term(self):
        """
        compiling term
        """
        cur_stat = self.peek_type()
        if cur_stat == JackTokenizer.INT_CONST:
            self.__writer.write_push("constant", self.peek())
            self.eat()
            return

        if cur_stat == JackTokenizer.KEYWORD:
            if self.peek() == "null" or self.peek() == "false":
                self.__writer.write_push("constant", 0)

            elif self.peek() == "true":
                self.__writer.write_push("constant", 0)
                self.__writer.write_arithmetic("not")

            elif self.peek() == "this":
                self.__writer.write_push("pointer", 0)

            self.eat()
            return

        if cur_stat == JackTokenizer.STR_CONST:
            string1 = self.peek().replace('\t', "\\t")
            string2 = string1.replace('\n', "\\n")
            string3 = string2.replace('\r', "\\r")
            string = string3.replace('\b', "\\b")
            self.__writer.write_push("constant", len(string))
            self.__writer.write_call("String.new", 1)
            for ch in string:
                self.__writer.write_push("constant", ord(ch))
                self.__writer.write_call("String.appendChar", 2)
            self.eat()
            return

        cur_stat = self.peek()
        if cur_stat == "(":
            self.eat()
            self.compile_expression()
            self.eat()
            return

        if cur_stat == "-":
            self.eat()
            self.compile_term()
            self.__writer.write_arithmetic("neg")
            return

        if cur_stat == "~":
            self.eat()
            self.compile_term()
            self.__writer.write_arithmetic("not")
            return

        cur_stat = self.peek_ll2()
        if cur_stat == "[":
            self.eat()
            name = self.get_token()
            self.__writer.push_val(self.find(name))
            self.eat()
            self.compile_expression()
            self.__writer.write_arithmetic("add")
            self.__writer.write_pop("pointer", 1)
            self.__writer.write_push("that", 0)
            self.eat()
            return

        if cur_stat == "." or cur_stat == "(":
            self.compile_subroutine_call()
            return

        self.eat()  # varName
        name = self.get_token()
        self.__writer.push_val(self.find(name))
        return

    def find(self, name):
        """
        finding a variable name in symbol tables
        """
        val = self.__subroutine_symbol.get_data(name)
        if not val:
            val = self.__class_symbol.get_data(name)
        elif not val:
            return False
        return val

    def compile_array(self, kind, index):
        """
        compiling array assignment
        :param kind: var kind
        :param index: var index
        """
        self.eat()
        self.compile_expression()
        self.eat()
        self.__writer.write_push(kind, index)
        self.__writer.write_arithmetic("add")
        self.eat()
        self.compile_expression()
        self.__writer.write_pop("temp", 0)
        self.__writer.write_pop("pointer", 1)
        self.__writer.write_push("temp", 0)
        self.__writer.write_pop("that", 0)
Esempio n. 12
0
class CompilationEngine:
    def __init__(self, tokenizer: JackTokenizer, jack_file):
        self.tokenizer = tokenizer
        self.class_name = ''
        log_file_name = jack_file.name.replace('.jack', '_engine.xml')
        self.log_file = open(log_file_name, 'w')
        log_file_name = jack_file.name.replace('.jack', '.vm')
        self.output_file = open(log_file_name, 'w')
        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(self.output_file)
        self.while_label_index = 0
        self.if_label_index = 0

    def compile(self):
        self.compile_class(0)

    def advance(self):
        """return current token"""
        return self.tokenizer.advance()

    def next(self) -> Token:
        return self.tokenizer.next()

    def compile_token(self, token, indentation, limits=None):
        print(token.content, end='  ')
        if limits is not None:
            if isinstance(limits, list) and token.token_type not in limits:
                raise RuntimeError(token, 'can be only', limits)
            if isinstance(limits, str) and token.content != limits:
                raise RuntimeError(token, 'can be only', limits)
        self.log(token, indentation)

    def log_node(self, msg, indentation):
        space = ''
        for i in range(0, indentation):
            space += '  '
        self.log_file.write('{1}<{0}>\n'.format(msg, space))

    def log(self, token, indentation):
        txt = token.content
        if txt == '<':
            txt = '&lt;'
        elif txt == '>':
            txt = '&gt;'
        elif txt == '\"':
            txt = '&quot;'
        elif txt == '&':
            txt = '&amp;'
        space = ''
        for i in range(0, indentation):
            space += '  '  # 2 spaces
        self.log_file.write('{2}<{0}> {1} </{0}>\n'.format(
            token.token_type, txt, space))

    def compile_class(self, indentation):
        """
        Compiles a complete class.
        """
        self.log_file.write('<class>\n')
        # 'class'
        advance = self.advance()
        self.compile_token(advance, indentation + 1)
        # class name
        advance = self.advance()
        self.class_name = advance.content
        self.compile_token(advance, indentation + 1)
        # set class name to vm-writer
        self.vm_writer.set_class_name(advance.content)
        # {
        advance = self.advance()
        self.compile_token(advance, indentation + 1, "{")
        # classVarDec* subroutineDec*
        advance = self.advance()
        while advance.content != '}':
            if (advance.content == 'constructor'
                    or advance.content == 'function'
                    or advance.content == 'method'):
                self.compile_subroutine(advance, indentation + 1)
            elif advance.content in ['field', 'static']:
                self.compile_class_var_dec(advance, indentation + 1)
            elif advance.content != '}':
                raise RuntimeError(
                    advance,
                    'Only subroutine and variable can be declared here')
            advance = self.advance()
        # }
        self.compile_token(advance, indentation + 1, '}')
        self.log_file.write('</class>\n')
        self.log_file.flush()
        print("\ncompilation success")
        return

    def compile_class_var_dec(self, token, indentation):
        """
        passing token as an argument, because the caller has already called the advance function once
        Compiles a static declaration or a field declaration.
        """
        self.log_node('classVarDec', indentation)
        # static or field
        kind = token.content.upper()
        self.compile_token(token, indentation + 1)
        token = self.advance()
        var_type = token.content
        self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
        # var name
        token = self.advance()
        var_name = token.content
        self.compile_token(token, indentation + 1, [IDENTIFIER])
        self.symbol_table.define(var_name, var_type, kind)
        token = self.advance()
        while token.content == ',':
            self.compile_token(token, indentation + 1, ',')
            token = self.advance()
            var_name = token.content
            self.symbol_table.define(var_name, var_type, kind)
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            token = self.advance()
        # ;
        self.compile_token(token, indentation + 1, ';')
        self.log_node('/classVarDec', indentation)
        return

    def compile_subroutine(self, token, indentation):
        """
        Compiles a complete method, function, or constructor.
        """
        # reset symbol table for subroutine
        self.symbol_table.start_subroutine()

        self.log_node('subroutineDec', indentation)
        # function/method/constructor
        function_type = token.content
        self.compile_token(token, indentation + 1)
        # void | type
        token = self.advance()
        self.compile_token(token, indentation + 1)
        # subroutine name
        token = self.advance()
        subroutine_name = token.content
        self.compile_token(token, indentation + 1)
        # (
        token = self.advance()
        self.compile_token(token, indentation + 1)
        # parameter list exists
        if function_type == 'method':
            self.symbol_table.define('this_placeholder', "THIS", ARG)
            pass
        token = self.advance()
        self.compile_parameter_list(token, indentation + 1)
        if token.content != ')':
            token = self.advance()
        # )
        self.compile_token(token, indentation + 1, ')')
        #  {
        token = self.advance()
        self.compile_subroutine_body(token, indentation + 1, subroutine_name,
                                     function_type)
        self.log_node('/subroutineDec', indentation)
        return

    def compile_subroutine_body(self,
                                token,
                                indentation,
                                subroutine_name,
                                function_type='function'):
        self.log_node('subroutineBody', indentation)
        self.compile_token(token, indentation + 1, '{')
        token = self.advance()
        n_locals = 0
        if token.content == 'var':
            n_locals = self.compile_var_dec(token, indentation + 1)
            token = self.advance()
        self.vm_writer.write_functions(subroutine_name, n_locals)

        # todo 处理constructor
        if function_type == 'constructor':
            # number of fields
            self.vm_writer.write_push('CONST',
                                      self.symbol_table.var_count(FIELD))
            self.vm_writer.write_call('Memory.alloc', 1)
            self.vm_writer.write_pop('POINTER', 0, 'set this pointer')
        elif function_type == 'method':
            # if it is a method, always set arg 0 to pointer 0(this)
            self.vm_writer.write_push(ARG, 0)
            self.vm_writer.write_pop('POINTER', 0)
            pass

        # if this token is '}' means the function has an empty body
        if token.content == '}':
            # TODO 空函数体的处理
            # empty body
            print('empty body', token)
            pass
        else:
            self.compile_statements(token, indentation + 1)
            token = self.advance()
        self.compile_token(token, indentation + 1, '}')
        self.log_node('/subroutineBody', indentation)

    def compile_parameter_list(self, token, indentation):
        """Compiles a (possibly empty) parameter list, not including the enclosing ‘‘ () ’’."""
        self.log_node('parameterList', indentation)
        while token.content != ')':
            param_symbol = Symbol()
            param_symbol.kind = ARG
            # parameter type
            self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
            param_symbol.symbol_type = token.content
            # parameter name
            token = self.advance()
            self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
            param_symbol.name = token.content
            self.symbol_table.define_symbol(param_symbol)
            if self.next() is not None and self.next().content == ',':
                # compile ,
                token = self.advance()
                self.compile_token(token, indentation + 1)
                token = self.advance()
                continue
            elif self.next() is not None and self.next().content == ')':
                # this function does not consumes ')' so didn't call advance()
                break
            else:
                token = self.advance()
        self.log_node('/parameterList', indentation)
        return

    def compile_var_dec(self, token, indentation) -> int:
        """  Compiles a var declaration."""

        # var_symbol = Symbol()
        # # var
        # self.compile_token(token, indentation + 1, 'var')
        # var_symbol.kind = VAR
        # # var type
        # token = self.advance()
        # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
        # var_symbol.symbol_type = token.content
        # # var name
        # token = self.advance()
        # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
        # var_symbol.name = token.content
        # # , or ;
        # token = self.advance()
        # while token.content != ';':
        #     self.compile_token(token, indentation + 1, ',')
        #     token = self.advance()
        #     self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
        #     token = self.advance()
        # self.compile_token(token, indentation + 1, ';')
        var_count = 0
        while token.content == 'var':
            self.log_node('varDec', indentation)
            var_count += 1
            var_symbol = Symbol()
            # var
            self.compile_token(token, indentation + 1, 'var')
            var_symbol.kind = VAR
            # var type
            token = self.advance()
            self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
            var_symbol.symbol_type = token.content
            # var name
            token = self.advance()
            self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
            var_symbol.name = token.content
            self.symbol_table.define_symbol(var_symbol)
            # next token may be ',' or ';'
            token = self.advance()
            # if next token is ','
            while token.content == ',':
                var_count += 1
                self.compile_token(token, indentation + 1, ',')
                # var name
                token = self.advance()
                self.compile_token(token, indentation + 1, [IDENTIFIER])
                # only name differs, types are the same
                self.symbol_table.define(token.content, var_symbol.symbol_type,
                                         VAR)
                token = self.advance()
            if token.content == ';':
                self.compile_token(token, indentation + 1, ';')
            if self.next().content == 'var':
                token = self.advance()
            self.log_node('/varDec', indentation)
        return var_count

    def compile_statements(self, token, indentation):
        """Compiles a sequence of statements, not including the enclosing ‘‘{}’’."""
        self.log_node('statements', indentation)
        while token.content != '}':
            if token.content == 'let':
                self.compile_let(token, indentation + 1)
                pass
            elif token.content == 'if':
                self.compile_if(token, indentation + 1)
                pass
            elif token.content == 'while':
                self.compile_while(token, indentation + 1)
                pass
            elif token.content == 'do':
                self.compile_do(token, indentation + 1)
                pass

            elif token.content == 'return':
                self.compile_return(token, indentation + 1)
                pass
            else:
                raise RuntimeError('unknown type in statements %s')
            if self.next() is not None and self.next().content == '}':
                break
            else:
                token = self.advance()
        self.log_node('/statements', indentation)
        return

    def compile_do(self, token: Token, indentation):
        self.log_node('doStatement', indentation)
        self.compile_token(token, indentation + 1, 'do')
        token = self.advance()
        self.compile_term(token, indentation + 1, do_term=True)
        self.vm_writer.write_pop('TEMP', 0, 'do call')
        token = self.advance()
        self.compile_token(token, indentation + 1, ';')
        # maybe a local subroutine or someone else's
        # token = self.advance()
        # self.compile_token(token, indentation + 1, [IDENTIFIER])
        # function_class_name = token.content
        # token = self.advance()
        # if token.content == '.':
        #     # someone else 's
        #     self.compile_token(token, indentation + 1, '.')
        #     token = self.advance()
        #     self.compile_token(token, indentation + 1, [IDENTIFIER])
        #     function_name = token.content
        #     token = self.advance()
        #     self.compile_token(token, indentation + 1, '(')
        #     token = self.advance()
        #     n_arg = self.compile_expression_list(token, indentation + 1)
        #     self.vm_writer.write_call(function_class_name + '.' + function_name, n_arg)
        #     # do calls must 'pop temp 0', because void functions always returns 0
        #     self.vm_writer.write_pop('TEMP', 0, 'do call')
        #     if token.content != ')':
        #         token = self.advance()
        #     self.compile_token(token, indentation + 1, ')')
        #     pass
        # else:
        #     self.compile_token(token, indentation + 1, '(')
        #     token = self.advance()
        #     self.compile_expression_list(token, indentation + 1)
        #     if token.content != ')':
        #         token = self.advance()
        #     self.compile_token(token, indentation + 1, ')')
        #     # local method
        #     pass
        # token = self.advance()
        # self.compile_token(token, indentation + 1, ';')
        self.log_node('/doStatement', indentation)
        return

    def compile_let(self, token: Token, indentation):
        """let length = Keyboard.readInt("HOW MANY NUMBERS? ");"""
        self.log_node('letStatement', indentation)
        # let
        self.compile_token(token, indentation + 1, 'let')
        #  length
        token = self.advance()
        self.compile_token(token, indentation + 1, [IDENTIFIER])
        var_name = token.content
        # = or [
        token = self.advance()
        array = False
        if token.content == '[':
            array = True
            self.compile_token(token, indentation + 1, '[')
            token = self.advance()
            # e.g x[y]
            # push y to stack
            self.compile_expression(token, indentation + 1)
            token = self.advance()
            self.compile_token(token, indentation + 1, ']')
            token = self.advance()
            # push x to stack
            self.write_push(var_name)
            # add x and y
            self.vm_writer.write_arithmetic('ADD')
            # # pop the result to THAT
            # self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_pop('TEMP', 2)
            pass
        self.compile_token(token, indentation + 1, '=')
        # expression
        token = self.advance()
        self.compile_expression(token, indentation + 1)
        if array:
            self.vm_writer.write_push('TEMP', 2)
            self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_pop('THAT', 0)
            pass
        else:
            if self.symbol_table.kind_of(var_name) == VAR:
                self.vm_writer.write_pop('LOCAL',
                                         self.symbol_table.index_of(var_name),
                                         var_name)
                pass
            elif self.symbol_table.kind_of(var_name) == ARG:
                self.vm_writer.write_pop('ARG',
                                         self.symbol_table.index_of(var_name),
                                         var_name)
                pass
            elif self.symbol_table.kind_of(var_name) == FIELD:
                self.vm_writer.write_pop('THIS',
                                         self.symbol_table.index_of(var_name),
                                         var_name)
                pass
            elif self.symbol_table.kind_of(var_name) == STATIC:
                self.vm_writer.write_pop('STATIC',
                                         self.symbol_table.index_of(var_name),
                                         var_name)
                pass
        # ;
        token = self.advance()
        self.compile_token(token, indentation + 1, ';')
        self.log_node('/letStatement', indentation)
        return

    def write_push(self, var_name):
        if self.symbol_table.kind_of(var_name) == VAR:
            self.vm_writer.write_push('LOCAL',
                                      self.symbol_table.index_of(var_name),
                                      var_name)
            pass
        elif self.symbol_table.kind_of(var_name) == ARG:
            self.vm_writer.write_push('ARG',
                                      self.symbol_table.index_of(var_name),
                                      var_name)
            pass
        elif self.symbol_table.kind_of(var_name) == FIELD:
            self.vm_writer.write_push('THIS',
                                      self.symbol_table.index_of(var_name),
                                      var_name)
            pass

    def compile_while(self, token: Token, indentation):
        while_label_pre = 'WHILE_%s' % self.while_label_index
        # label index++
        self.while_label_index += 1
        self.vm_writer.write_label('%s_EXP' % while_label_pre)
        self.log_node('whileStatement', indentation)
        self.compile_token(token, indentation + 1, 'while')
        token = self.advance()
        self.compile_token(token, indentation + 1, '(')
        token = self.advance()
        self.vm_writer.write_comment("calculating while condition expression")
        # expression
        self.compile_expression(token, indentation + 1)
        # )
        token = self.advance()
        self.compile_token(token, indentation + 1, ')')
        self.vm_writer.write_arithmetic('NOT')
        # checking condition expression
        self.vm_writer.write_if('%s_END' % while_label_pre)
        # {
        token = self.advance()
        self.compile_token(token, indentation + 1, '{')
        # statements
        token = self.advance()
        if token.content != '}':
            # not empty statement
            self.compile_statements(token, indentation + 1)
            # }
            token = self.advance()
        self.compile_token(token, indentation + 1, '}')
        self.vm_writer.write_goto('%s_EXP' % while_label_pre)
        self.vm_writer.write_label('%s_END' % while_label_pre)
        self.log_node('/whileStatement', indentation)
        return

    def compile_return(self, token: Token, indentation):
        self.log_node('returnStatement', indentation)
        self.compile_token(token, indentation + 1, 'return')
        token = self.advance()
        if token.content != ';':
            self.compile_expression(token, indentation + 1)
            token = self.advance()
            self.vm_writer.write_return()
        else:
            # for functions that return void, it must return an integer 0
            self.vm_writer.write_return(True)
            pass
        self.compile_token(token, indentation + 1, ';')
        self.log_node('/returnStatement', indentation)
        return

    def compile_if(self, token: Token, indentation):
        # if_label_pre = 'IF_%s' % self.if_label_index
        else_label = 'ELSE_%s' % self.if_label_index
        finish_label = 'FINISH_%s' % self.if_label_index
        # label index++
        self.if_label_index += 1

        self.log_node('ifStatement', indentation)
        self.compile_token(token, indentation + 1, 'if')
        token = self.advance()
        self.compile_token(token, indentation + 1, '(')
        self.vm_writer.write_comment("calculating if condition expression")
        token = self.advance()
        # expression
        self.compile_expression(token, indentation + 1)
        # )
        token = self.advance()
        self.compile_token(token, indentation + 1, ')')
        self.vm_writer.write_arithmetic('NOT')
        self.vm_writer.write_if(else_label)
        # {
        token = self.advance()
        self.compile_token(token, indentation + 1, '{')
        # statements
        token = self.advance()
        if token.content != '}':
            # not empty statement
            self.compile_statements(token, indentation + 1)
            # }
            token = self.advance()
        self.compile_token(token, indentation + 1, '}')
        if self.next().content == 'else':
            """
            if statements...
            (else vm code)
            goto FINISH // if statements finished, pass the else code
            lable ELSE
            else statements...
            label FINISH
            """
            self.vm_writer.write_goto(finish_label)
            self.vm_writer.write_label(else_label)
            token = self.advance()
            self.compile_token(token, indentation + 1, 'else')
            token = self.advance()
            self.compile_token(token, indentation + 1, '{')
            token = self.advance()
            self.compile_statements(token, indentation + 1)
            token = self.advance()
            self.compile_token(token, indentation + 1, '}')
            self.vm_writer.write_label(finish_label)
        else:
            """
            if statements...
            (no else vm code)
            label ELSE
            """
            self.vm_writer.write_label(else_label)
            pass
        self.log_node('/ifStatement', indentation)
        return

    def compile_expression(self, token, indentation):
        self.log_node('expression', indentation)
        self.compile_term(token, indentation + 1)
        while self.next() is not None and self.next(
        ).content in OP_SYMBOLS.keys():
            token = self.advance()
            self.compile_token(token, indentation + 1, [SYMBOL])
            op_symbol = OP_SYMBOLS[token.content]
            token = self.advance()
            self.compile_term(token, indentation + 1)
            # call op function after pushes the second parameter
            self.vm_writer.write_arithmetic(op_symbol)
        self.log_node('/expression', indentation)
        return

    def compile_term(self, token: Token, indentation, do_term=False):
        if not do_term:
            self.log_node('term', indentation)
        if token.token_type == INT_CONST:
            self.compile_token(token, indentation + 1, [INT_CONST])
            # todo
            self.vm_writer.write_push('CONST', token.content)
            pass
        elif token.token_type == STRING_CONST:
            """
            // construct a string
            push constant 3
            call String.new 1
            // the address of string is now on the stack
            push constant 72
            call String.appendChar 2
            push constant 73
            call String.appendChar 2
            push constant 74
            call String.appendChar 2
            // string construct complete its still on sp
            """
            length = len(token.content)
            self.vm_writer.write_push('CONST', length)
            self.vm_writer.write_call('String.new', 1)
            for c in token.content:
                self.vm_writer.write_push('CONST', ord(c))
                self.vm_writer.write_call('String.appendChar', 2)
                pass
            self.compile_token(token, indentation + 1)
            # keyword constant
        elif token.content == 'true':
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('CONST', 1)
            self.vm_writer.write_arithmetic('NEG')
            pass
        elif token.content == 'false':
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('CONST', 0)
            pass
        elif token.content == 'this':
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('POINTER', 0)
            pass
        elif token.content == 'null':
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('CONST', 0)
            pass
        elif token.content in ['true', 'false', 'null', 'this']:
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('POINTER', 0)
            # self.vm_writer.write_comment('%s not implemented' % token.content)
            pass
        elif self.next().content == '[':
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            self.write_push(token.content)
            token = self.advance()
            self.compile_token(token, indentation + 1, '[')
            token = self.advance()
            self.compile_expression(token, indentation + 1)
            token = self.advance()
            self.compile_token(token, indentation + 1, ']')
            self.vm_writer.write_arithmetic('ADD')
            self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_push('THAT', 0)
            pass
        elif token.content == '(':
            self.compile_token(token, indentation + 1, '(')
            token = self.advance()
            self.compile_expression(token, indentation + 1)
            if token.content != ')':
                token = self.advance()
            self.compile_token(token, indentation + 1, ')')
            pass
        elif token.content in UNARY_OP_SYMBOL.keys():
            self.compile_token(token, indentation + 1)
            unary_op = UNARY_OP_SYMBOL[token.content]
            token = self.advance()
            self.compile_term(token, indentation + 1)
            self.vm_writer.write_arithmetic(unary_op)
        # elif self.next().content == ';':
        #     # varname
        #     self.compile_token(token, indentation + 1)
        #     pass
        elif self.next().content == '(':
            # method call
            n_arg = 1
            self.vm_writer.write_push('POINTER', 0)
            # self.vm_writer.write_pop(ARG, 0)
            function_class_name = self.class_name
            function_name = token.content
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            token = self.advance()
            self.compile_token(token, indentation + 1, '(')
            token = self.advance()
            n_arg += self.compile_expression_list(token, indentation + 1)
            self.vm_writer.write_call(
                function_class_name + '.' + function_name, n_arg)
            if token.content != ')':
                token = self.advance()
            self.compile_token(token, indentation + 1, ')')
            pass
        elif self.next().content == '.':
            # static function call
            # class name
            n_arg = 0
            function_class_name = token.content
            if self.symbol_table.index_of(function_class_name) > -1:
                n_arg += 1
                self.vm_writer.write_push(
                    self.symbol_table.kind_of(function_class_name),
                    self.symbol_table.index_of(function_class_name),
                    function_class_name)
                function_class_name = self.symbol_table.type_of(
                    function_class_name)
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            token = self.advance()
            self.compile_token(token, indentation + 1, '.')
            #  function name
            token = self.advance()
            function_name = token.content
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            token = self.advance()
            self.compile_token(token, indentation + 1, '(')
            token = self.advance()
            n_arg += self.compile_expression_list(token, indentation + 1)
            self.vm_writer.write_call(
                function_class_name + '.' + function_name, n_arg)
            if token.content != ')':
                token = self.advance()
            self.compile_token(token, indentation + 1, ')')
            pass
        elif token.token_type == IDENTIFIER:
            # varName
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            # todo 处理不同情形
            if self.symbol_table.kind_of(token.content) == VAR:
                self.vm_writer.write_push(
                    'LOCAL', self.symbol_table.index_of(token.content),
                    token.content)
            elif self.symbol_table.kind_of(token.content) == ARG:
                self.vm_writer.write_push(
                    'ARG', self.symbol_table.index_of(token.content),
                    token.content)
                pass
            elif self.symbol_table.kind_of(token.content) == FIELD:
                self.vm_writer.write_push(
                    'FIELD', self.symbol_table.index_of(token.content),
                    token.content)
                pass
            elif self.symbol_table.kind_of(token.content) == STATIC:
                self.vm_writer.write_push(
                    'STATIC', self.symbol_table.index_of(token.content),
                    token.content)
                pass
            pass
        else:
            raise RuntimeError("Uncaught situation", token)
        if not do_term:
            self.log_node('/term', indentation)
        return

    def compile_expression_list(self, token: Token, indentation) -> int:
        self.log_node('expressionList', indentation)
        n_expression = 0
        while token.content != ')':
            n_expression += 1
            self.compile_expression(token, indentation + 1)
            if self.next() is not None and self.next().content == ',':
                # multiple expression list
                token = self.advance()
                self.compile_token(token, indentation + 1, ',')
                token = self.advance()
            elif self.next() is not None and self.next().content == ')':
                break
            else:
                print('UNEXPECTED token in compile_expression_list', token)
                token = self.advance()
        self.log_node('/expressionList', indentation)
        return n_expression
class CompilationEngine:

    def __init__(self, input_stream: str, jack_tokenizer: JackTokenizer):
        """
        creates a new compilation engine with the given
        input and output.
        :param input_stream: given input stream
        :param jack_tokenizer: given jack tokenizer
        """
        self.tokenizer = jack_tokenizer
        self.tokens = jack_tokenizer.get_tokens()
        self.file_name = input_stream.replace(".jack", "")
        self.output_file_name = input_stream.replace(".jack", ".xml")
        self.output_file = open(self.output_file_name, "wb")
        self.current_class_name = None
        self.root = None
        self.label_counter = 0
        self.tree = None

        # ----- identifier type, project 11, Wednesday -------- #
        self.identifier_counter = {LOCAL: 0,
                                   ARGUMENT: 0,
                                   STATIC: 0,
                                   FIELD: 0}
        # ----------------------------------------------------- #

        self.symbol_table = SymbolTable()
        self.VMWriter = None

    def compile(self) -> None:
        """
        method to compile jack file and close file afterwards
        :return: none
        """
        self.tokenizer.advance()
        self.compile_class()
        self.output_file.close()

    def compile_class(self) -> None:
        """
        compiles a class
        :return: None
        """

        # create VMWriter for current class
        self.VMWriter = VMWriter(self.file_name)

        # was class
        self.tokenizer.advance()
        # now name

        # current class name :
        self.current_class_name = self.tokenizer.get_current_token()[1]

        # was name
        self.tokenizer.advance()
        # now {

        # was {
        self.tokenizer.advance()
        # now class body

        while self.tokenizer.has_more_tokens():
            current_token = self.tokenizer.get_current_token()
            token_string = current_token[1]
            if CompilationEngine.is_class_field(token_string):
                self.compile_class_var_declaration()
            elif CompilationEngine.is_subroutine(token_string):
                self.compile_subroutine()

        # insert last  "}" of end of class
        current_token = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()

        # # ***** testing  ***** #
        # tree = etree.ElementTree(self.root)
        # # etree.indent(self.root, "")
        # tree.write(self.output_file, pretty_print=True)

    @staticmethod
    def is_subroutine(token: str) -> bool:
        """
        method to check if token is subroutine
        :param token: string of current token
        :return: true if subroutine declaration, false otherwise
        """
        return ((token == "constructor") or (token == "function") or (
                token == "method"))

    @staticmethod
    def is_var_declare(token: str) -> bool:
        return token == "var"

    @staticmethod
    def is_class_field(token: str) -> bool:
        """
        method to check if token is class field
        :param token: string of current token
        :return: true if class field declaration, false otherwise
        """
        return (token == "static") or (token == "field")

    @staticmethod
    def is_statement(token: str) -> bool:
        return (token == LET) or (token == IF) or (token == WHILE) or (
                token == DO) or (token == RETURN)

    def insert_next_token(self, root) -> None:
        """
        insert next token
        :return: none
        """
        current_token = self.tokenizer.get_current_token()
        token_type = current_token[0]
        token_string = current_token[1]

        if token_type == JackTokenizer.STRING_TYPE:
            token_string = token_string[1:-1]

        etree.SubElement(root, token_type).text = " " + token_string + " "
        self.tokenizer.advance()

    def compile_class_var_declaration(self) -> None:
        """
        compiles a variable declaration
        :return: None
        """

        # variable kind: field | static
        kind = self.tokenizer.get_current_token()[1]
        # field | static
        self.tokenizer.advance()

        # variable type
        type_var = self.tokenizer.get_current_token()[1]
        # int|char|boolean
        self.tokenizer.advance()

        # variable name
        name = self.tokenizer.get_current_token()[1]
        # varName
        self.tokenizer.advance()

        # adding to symbol table
        if kind == STATIC:
            # static variable
            self.identifier_counter[STATIC] += 1
        else:
            # class field
            self.identifier_counter[FIELD] += 1

        # adding to symbol table anyways
        self.symbol_table.define(name, type_var, kind)

        # run in a loop and print all names, with "," in between
        while self.tokenizer.current_word == COMMA:
            # ,
            self.tokenizer.advance()

            # need to add to symbol table as well
            # type is as before, and kind is as before
            # still needs to add to counter
            name = self.tokenizer.get_current_token()[1]

            # adding to symbol table
            if kind == STATIC:
                # static variable
                self.identifier_counter[STATIC] += 1
            else:
                # class field
                self.identifier_counter[FIELD] += 1

            # adding to symbol table anyways
            self.symbol_table.define(name, type_var, kind)

            # varName
            self.tokenizer.advance()

        # end of declaration
        # ;
        current_token = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()

    def compile_subroutine(self) -> None:
        """
        compiles a complete method
        function or constructor
        :return: None
        """

        # restart as a new subroutine
        self.symbol_table.start_subroutine()

        # constructor | function | method
        subroutine_type = self.tokenizer.get_current_token()[1]

        # add this if it is a method
        if subroutine_type == METHOD:
            name = THIS
            var_type = self.current_class_name
            kind = ARGUMENT
            self.symbol_table.define(name, var_type, kind)

        # was function type
        self.tokenizer.advance()
        # now return type

        # was return type
        self.tokenizer.advance()
        # now subroutine name
        subroutine_name = self.tokenizer.get_current_token()[1]

        subroutine_name = self.current_class_name + DOT + subroutine_name

        # was name
        self.tokenizer.advance()
        # now (

        # parameter list compilation
        # and inserting it into the subtree
        self.compile_parameter_list()

        # was )
        self.tokenizer.advance()
        # now {

        # subroutine body
        self.compile_subroutine_body(subroutine_name, subroutine_type)

        # was }
        self.tokenizer.advance()
        # now token
        return

    def compile_subroutine_body(self, subroutine_name: str,
                                subroutine_type: str):
        """
        method to compile subroutine body
        :return: None
        """

        n_locals = self.symbol_table.variable_counter[FIELD]

        # {
        current_token = self.tokenizer.get_current_token()[1]

        # vars inside
        var_count = 0

        # was {
        self.tokenizer.advance()
        current_token = self.tokenizer.get_current_token()[1]
        # now subroutine body

        # read all variable declares
        while CompilationEngine.is_var_declare(current_token):
            # adding var declare subtree
            # to subroutine body element tree
            var_count = var_count + self.compile_var_declaration()
            current_token = self.tokenizer.get_current_token()[1]

        # function declare line
        self.VMWriter.write_function(subroutine_name, var_count)

        # putting this
        if subroutine_type == CONSTRUCTOR:
            # allocate memory for object
            # subroutine is constructor

            # push const nLocals
            self.VMWriter.write_push(CONSTANT, n_locals)
            # call Memory.alloc 1
            self.VMWriter.write_call(ALLOCATION_METHOD, ONE_NUM)
            # (popping this): pop pointer 0
            self.VMWriter.write_pop(POINTER, ZERO_NUM)

        elif subroutine_type == METHOD:
            # push argument 0
            self.VMWriter.write_push(ARGUMENT, ZERO_NUM)
            # push pop pointer 0
            self.VMWriter.write_pop(POINTER, ZERO_NUM)

        # subroutine statements
        self.compile_statements()

        # }
        self.tokenizer.advance()

    def compile_var_declaration(self) -> int:
        """
        method to compile var declaration lines
        """

        var_count = 0

        # was var kind (var)
        kind = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()
        # now type

        # get type which is int|char|boolean|class
        type_var = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()
        # now name

        # get name which is int|char|boolean|class
        name = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()
        # now , or ;

        # adding to symbol table
        self.symbol_table.define(name, type_var, kind)

        var_count += 1

        # run in a loop and print all names, with "," in between
        while self.tokenizer.current_word == COMMA:
            # was ,
            var_count += 1
            self.tokenizer.advance()
            # now name

            # get name which for the int|char|boolean|class var
            name = self.tokenizer.get_current_token()[1]
            self.tokenizer.advance()
            # now , or ;

            # adding to symbol table
            self.symbol_table.define(name, type_var, kind)

        # end of declaration

        # was ;
        self.tokenizer.advance()
        # now next line
        return var_count

    def compile_parameter_list(self) -> int:
        """
        compiles a (CAN BE EMPTY) parameter list
        not including the enclosing "()"
        :return: var count of parameter list
        """
        var_count = 0

        # was (
        self.tokenizer.advance()
        current_token = self.tokenizer.get_current_token()[1]
        # now arguments or )

        # till we at the end of the param line -> ")"
        if current_token != END_OF_PARAM_LIST:

            var_count += 1
            kind = ARGUMENT

            # was var_type
            var_type = self.tokenizer.get_current_token()[1]
            self.tokenizer.advance()
            # now var name

            # was var_name
            name = self.tokenizer.get_current_token()[1]
            self.tokenizer.advance()
            # now , or )

            # possible_variable = self.get_variable_of_table(name)
            # if possible_variable is None:
            self.symbol_table.define(name, var_type, kind)
            # otherwise its inside

            current_token = self.tokenizer.get_current_token()[1]

            # go through param list
            while current_token == COMMA:
                var_count += 1

                # was ,
                self.tokenizer.advance()
                # now type

                # var_type
                var_type = self.tokenizer.get_current_token()[1]
                self.tokenizer.advance()
                # now var name

                # var_name
                name = self.tokenizer.get_current_token()[1]

                # possible_variable = self.get_variable_of_table(name)
                # if possible_variable is None:
                self.symbol_table.define(name, var_type, kind)
                # otherwise its inside

                self.tokenizer.advance()
                # now comma or )

                # check again current token
                current_token = self.tokenizer.get_current_token()[1]
        return var_count

    def compile_statements(self) -> None:
        """
        compiles a sequence of statements
        not including the enclosing {}
        :return: None
        """
        # statement
        current_token = self.tokenizer.get_current_token()[1]

        if current_token == END_OF_CLASS:
            # end of function we return
            return

        peek_at_next = current_token

        # peek statements as long as we have them
        # determine their type
        # add the statement block to the
        # over all statements blocks

        while CompilationEngine.is_statement(peek_at_next):
            # pretty much straight forward
            # we have some types of statements
            # and we need to find out which one
            # and send to the fitting compilation method
            if peek_at_next == LET:
                self.compile_let()
            elif peek_at_next == IF:
                self.compile_if()
            elif peek_at_next == WHILE:
                self.compile_while()
            elif peek_at_next == DO:
                self.compile_do()
            elif peek_at_next == RETURN:
                self.compile_return()
            # adding the statement was done inside
            # getting the token we are on
            peek_at_next = self.tokenizer.peek_at_next_token()[1]

    def compile_do(self) -> None:
        """
        compiles a do statement
        :return: None
        """

        # peeked on do
        # now advanced to do
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != DO:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]

        # do
        self.tokenizer.advance()
        # what to do

        # --------------------------------------------- #
        # compilation of subroutine or some class routine
        # --------------------------------------------- #

        # subroutine_name
        # ------- or, for another class method  ---------
        # class_name  -> then .subroutine_name

        rout_or_class_name = self.tokenizer.get_current_token()[1]

        peek_at_token = self.tokenizer.peek_at_next_token()[1]

        if peek_at_token != START_OF_PARAM_LIST:
            self.tokenizer.advance()

        self.compile_call(rout_or_class_name)

        # now comes ;
        self.tokenizer.advance()

        # popping temp 0
        self.VMWriter.write_pop(TEMP, ZERO_NUM)

    def compile_let(self) -> None:
        """
        compiles a let statement
        --------------------
        let  "var_name" = "expression" ;
        --------------------
        :return: None
        """
        # peeked on let
        # now advanced to let
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != LET:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]
        not_array_flag = True

        # should be varName, might be varName []
        # was let
        self.tokenizer.advance()
        var_name = self.tokenizer.get_current_token()[1]

        # now var name

        # was var name
        self.tokenizer.advance()
        current_token = self.tokenizer.get_current_token()[1]
        # now  =  or [

        if current_token == ARRAY_OPENER:
            not_array_flag = False
            self.calculate_memory_location(var_name)

        # were on =
        self.tokenizer.advance()
        # now on expression

        self.compile_expression()

        # after expression
        # comes;
        self.tokenizer.advance()

        if not_array_flag:
            # not array, we pop variable
            variable = self.get_variable_of_table(var_name)
            var_kind = variable[KIND]

            segment = SymbolTable.get_segment(var_kind)
            var_index = variable[INDEX]
            self.VMWriter.write_pop(segment, var_index)
        else:
            # array, we pop array element
            # pop temp 0
            self.VMWriter.write_pop(TEMP, ZERO_NUM)
            # pop pointer 1
            self.VMWriter.write_pop(POINTER, ONE_NUM)
            # push temp 0
            self.VMWriter.write_push(TEMP, ZERO_NUM)
            # pop that 0
            self.VMWriter.write_pop(THAT, ZERO_NUM)

    def calculate_memory_location(self, var_name):
        """
        method to calculate location of current var index
        :param var_name: name of variable
        :return:
        """
        # pushing name
        variable = self.get_variable_of_table(var_name)

        var_kind = variable[KIND]

        segment = SymbolTable.get_segment(var_kind)
        var_index = variable[INDEX]

        # after [
        self.tokenizer.advance()

        # expression inside array
        self.compile_expression()

        self.VMWriter.write_push(segment, var_index)
        # write add to add memory places
        self.VMWriter.write_arithmetic(ADD)

        # were on whats inside array
        self.tokenizer.advance()
        # now on ]

        # were on ]
        self.tokenizer.advance()
        # now on expression

    def compile_while(self):
        """
        compiles a while statement
        --------------------
        while  ( "expression" )
        { "statements }
        --------------------
        :return: None
        """

        # peeked on while
        # now advanced to let
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != WHILE:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]

        # label L1
        while_label = self.label_generator()
        self.VMWriter.write_label(while_label)

        # while
        self.tokenizer.advance()
        # (
        self.tokenizer.advance()

        # expression of while
        self.compile_expression()

        # ~(cond)
        # negate condition
        negate = BINARY_DICT["~"]
        self.VMWriter.write_arithmetic(negate)
        #  --------------------  #

        # )
        self.tokenizer.advance()

        # if-goto L2
        after_while_label = self.label_generator()
        self.VMWriter.write_if(after_while_label)

        # {
        self.tokenizer.advance()

        # statement
        self.tokenizer.advance()

        self.compile_statements()

        # goto L1
        self.VMWriter.write_goto(while_label)

        # label L2
        self.VMWriter.write_label(after_while_label)

        # }
        self.tokenizer.advance()

    def compile_return(self) -> None:
        """
        compiles a return statement
        :return: None
        """
        # peeked on return
        # now advanced to return
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != RETURN:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]

        value_to_return = self.tokenizer.peek_at_next_token()[1]

        if value_to_return == COMMA_DOT:
            # no value to return
            self.tokenizer.advance()
            self.VMWriter.write_push(CONSTANT, ZERO_NUM)
            self.VMWriter.write_return()
            return

        # evaluate return value
        self.tokenizer.advance()
        self.compile_expression()
        self.VMWriter.write_return()

        # ;
        self.tokenizer.advance()

    def compile_if(self):
        """
        compiles an if statement
        possibly with a trailing else clause
        --------------------
        if  ( "expression" )
        { "statements }
        - might be
        else {
        }
        --------------------
        :return: None
        """

        # peeked on if
        # now advanced to if
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != IF:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]

        L1 = self.label_generator()
        L2 = self.label_generator()

        # was if now (
        self.tokenizer.advance()

        # cond
        # build if expression
        self.compile_expression()

        # ~(cond)
        # negate condition
        negate = BINARY_DICT["~"]
        self.VMWriter.write_arithmetic(negate)
        #  --------------------  #

        # )
        self.tokenizer.advance()

        # if-goto L1
        self.VMWriter.write_if(L1)
        #  --------------------  #

        # {
        self.tokenizer.advance()

        # insert whats inside if() { lalla }

        # VM code for s1
        self.compile_statements()
        #  --------------------  #

        # goto L2
        self.VMWriter.write_goto(L2)
        #  --------------------  #

        # }
        self.tokenizer.advance()

        # now we might have else:
        current_token = self.tokenizer.get_current_token()[1]
        current_peek = self.tokenizer.peek_at_next_token()[1]

        # label L1
        self.VMWriter.write_label(L1)
        #  --------------------  #

        # statements 2 is else :
        if (current_peek == ELSE) | (current_token == ELSE):
            if current_peek == ELSE:
                self.tokenizer.advance()
            # now else
            self.tokenizer.advance()
            # {
            self.tokenizer.advance()

            self.compile_statements()

            # }
            self.tokenizer.advance()

        # label L2
        self.VMWriter.write_label(L2)
        #  --------------------  #

    def compile_expression(self) -> None:
        """
        compiles an expression
        --------------------
        term (optional term)?
        term: var_name or constant
              - var_name: string with no digit
              - constant: decimal number
        --------------------
        :return: tree of an expression
        """

        # first term
        self.compile_term()

        peek_at_token = self.tokenizer.peek_at_next_token()[1]

        while peek_at_token in BINARY_OPERATORS:
            # binary op
            self.tokenizer.advance()
            operation = self.tokenizer.get_current_token()[1]

            # expression
            self.tokenizer.advance()

            # compile term
            self.compile_term()

            arithmetic_command = BINARY_DICT[peek_at_token]
            self.VMWriter.write_arithmetic(arithmetic_command)

            # renew again
            peek_at_token = self.tokenizer.peek_at_next_token()[1]

    def compile_term(self) -> None:
        """
        compiles a term.
        if the current token is an identifier  we distinguish between
        - a variable: .
        - an array entry: [
        - subroutine call: (
        :return: None
        """

        # get current token we insert
        current_token = self.tokenizer.get_current_token()
        token_type = current_token[0]
        token_string = current_token[1]

        # integerConstant
        if token_type == JackTokenizer.INT_TYPE:
            self.VMWriter.write_push(CONSTANT, token_string)

        # stringConstant
        elif token_type == JackTokenizer.STRING_TYPE:
            # construction of string inside
            self.construct_string(token_string)

        # keywordConstant
        elif token_type == JackTokenizer.KEYWORD_TYPE:
            if token_string == TRUE:
                self.VMWriter.write_push(CONSTANT, ZERO_NUM)
                neg_op = BINARY_DICT["~"]
                self.VMWriter.write_arithmetic(neg_op)
            if token_string == FALSE:
                self.VMWriter.write_push(CONSTANT, ZERO_NUM)
            elif token_string == THIS:
                self.VMWriter.write_push(POINTER, ZERO_NUM)
            elif token_string == NULL:
                self.VMWriter.write_push(CONSTANT, ZERO_NUM)

        # unaryOperator {- , ~}
        elif token_string in UNARY_OPERATORS:

            # operator to print after expression

            # we can not sub anything, we negate.
            if token_string == "-":
                token_string = "!"

            op = BINARY_DICT[token_string]

            self.tokenizer.advance()

            # create a term of the inside of the operator
            self.compile_term()
            # neg if -
            # not if ~
            self.VMWriter.write_arithmetic(op)
            # advance to next term

        # anyways we have a varNam or, varName[] or, subroutineCall () or ()

        # ( -> some expression -> )
        elif token_string == START_OF_PARAM_LIST:
            # (
            self.tokenizer.advance()
            # insert expression
            self.compile_expression()
            # )
            # advance to next term
            self.tokenizer.advance()

        else:
            # was some identifier
            possibly_parent = self.tokenizer.peek_at_next_token()[1]
            # now . or [

            # pretty much straight forward:
            # 1. array opener []
            # 2. expression opener () # function call
            # 3. className. -> and then # 2. call of subroutineName()
            # 4. simple varName
            if possibly_parent == ARRAY_OPENER:
                self.tokenizer.advance()
                self.array_variable(token_string)
            elif possibly_parent == START_OF_PARAM_LIST:
                # subroutine call immediately
                # (
                # lets compile it as a call.
                self.compile_call(token_string)
            elif possibly_parent == DOT:
                # .
                self.tokenizer.advance()
                # we have a possible className in token_string
                # now we will have a subroutine name and call
                self.compile_call(token_string)
            else:
                self.simple_variable(token_string)

    def simple_variable(self, var_name) -> None:
        """
        method to push simple variable
        :param var_name: var name we push
        :return: None
        """

        variable = self.get_variable_of_table(var_name)

        var_kind = variable[KIND]
        segment = SymbolTable.get_segment(var_kind)
        var_index = variable[INDEX]
        self.VMWriter.write_push(segment, var_index)

    def array_variable(self, var_name):

        variable = self.get_variable_of_table(var_name)

        var_kind = variable[KIND]
        var_index = variable[INDEX]
        segment = SymbolTable.get_segment(var_kind)

        # [
        self.tokenizer.advance()

        # expression inside []
        self.compile_expression()

        # push start of array
        self.VMWriter.write_push(segment, var_index)

        # handling writing to an array element
        # adding to base address, the expression
        self.VMWriter.write_arithmetic(ADD)
        # pop pointer 1
        self.VMWriter.write_pop(POINTER, ONE_NUM)
        # push that 0
        self.VMWriter.write_push(THAT, ZERO_NUM)

        # closing array
        # ]
        self.tokenizer.advance()

    def compile_expression_list(self) -> int:
        """
        compiles (might be empty list) a comma separated
        list of expression
        :return: amount of expressions
        """
        current_token = self.tokenizer.get_current_token()[1]
        # we are on (
        self.tokenizer.advance()
        # now we on ) or argument

        arguments_count = 0

        # we start unless we are already at ")"
        # just like with param list

        # or arg or )
        current_token = self.tokenizer.get_current_token()[1]

        if current_token != END_OF_PARAM_LIST:
            arguments_count += 1

            # compiling argument
            self.compile_expression()

            # close of expression
            self.tokenizer.advance()

            # renew current token
            current_token = self.tokenizer.get_current_token()[1]

            while current_token == COMMA:
                # was , -> now ) or argument
                self.tokenizer.advance()

                # now new argument
                arguments_count += 1
                # new expression tree
                self.compile_expression()
                # on term
                self.tokenizer.advance()
                # and go again, renew current token
                current_token = self.tokenizer.get_current_token()[1]

        return arguments_count

    def label_generator(self) -> str:
        """
        helper method
        method to generate new label
        :return: str of new label
        """
        label = LABEL + str(self.label_counter)
        self.label_counter += 1
        return label

    def construct_string(self, token_string):
        # need to call String.new
        token_string = token_string[1:-1]
        memory_to_alloc = len(token_string)
        self.VMWriter.write_push(CONSTANT, memory_to_alloc)
        # calling String.new 1, empty string of size (memory to alloc)
        self.VMWriter.write_call(STRING_ALLOC_METHOD, ONE_NUM)
        # need to add ascii value of chars:
        for char_of_string in token_string:
            ascii_value = ord(char_of_string)
            self.VMWriter.write_push(CONSTANT, ascii_value)
            self.VMWriter.write_call(STRING_APPENDING, TWO_NUM)

    def compile_call(self, rout_or_class_name) -> None:
        """
        method to compile call
        :param rout_or_class_name:  name of class or subroutine
        :return: none
        """
        variable = self.get_variable_of_table(rout_or_class_name)

        if variable is not None:
            rout_or_class_name = variable[TYPE]
            subroutine_type = variable[TYPE]
            var_index = variable[INDEX]
            var_kind = SymbolTable.get_segment(variable[KIND])
            self.VMWriter.write_push(var_kind, var_index)
        else:
            subroutine_type = None

        # . or subroutine name
        current_token = self.tokenizer.get_current_token()[1]
        if current_token == DOT:
            # it is a call for a className.subroutineName

            # was .
            self.tokenizer.advance()
            # now subroutine name

            # subroutine_name
            subroutine_name = self.tokenizer.get_current_token()[1]

            # Class.Subroutine
            subroutine_name = rout_or_class_name + DOT + subroutine_name
        else:
            # a subroutine name
            self.VMWriter.write_push(POINTER, ZERO_NUM)
            subroutine_name = self.current_class_name + DOT + rout_or_class_name
            subroutine_type = METHOD

        if (subroutine_type is None) | (subroutine_type == VOID):
            # other class of void
            arguments = 0
        else:
            # method or constructor
            arguments = 1

        # start of expression list
        # ------------------------
        # was subroutine name
        self.tokenizer.advance()
        # now (

        # compilation of expression list
        arguments = arguments + self.compile_expression_list()

        # -------------------- #
        # end of expression list
        # -------------------- #

        # call subroutine_name arguments
        self.VMWriter.write_call(subroutine_name, arguments)

    def get_variable_of_table(self, var_name):
        """
        method to get variable of one of tables
        :param var_name: var name to get
        :return: dict of variable
        """
        variable = None
        # if in both
        if (var_name in self.symbol_table.variable_table.keys()) & \
                (var_name in self.symbol_table.subroutine_table.keys()):
            variable = self.symbol_table.subroutine_table[var_name]
        elif var_name in self.symbol_table.variable_table.keys():
            variable = self.symbol_table.variable_table[var_name]
        elif var_name in self.symbol_table.subroutine_table.keys():
            variable = self.symbol_table.subroutine_table[var_name]
        return variable
Esempio n. 14
0
class CompilationEngine(object):
    def __init__(self, inputfile, outputfile):
        self._inputfile = inputfile
        self._outputfile = outputfile
        self._tokenizer: JackTokenizer = None
        self._cur_root = []
        self._n_args = []
        self._root = None
        self.class_name = None
        self.return_type = None
        self._label_cnt = 0
        self.vm_writer = None  # type:VMWriter
        self._init()
        self.symbol = SymbolTable()
        self.vm_writer.set_engine(self)
        self.method_type = None

    def line_num(self):
        return self._tokenizer.line

    def _init(self):
        self._inputbuf = self.create_buffer(self._inputfile)
        self._outputbuf = self.create_buffer(self._outputfile, mode="w+")
        self.vm_writer = VMWriter(self._outputfile[:-4] + ".vm")
        self._tokenizer = JackTokenizer(self._inputbuf)

    def create_buffer(self, fn, mode='r'):

        if isinstance(fn, str) or isinstance(fn, unicode):
            return open(fn, mode)
        elif isinstance(fn, file) or isinstance(fn, IOBase):
            return fn
        else:
            raise ValueError("file object show file or readable")

    def compile_class(self):
        parent = self._set_parent("class")
        self._root = parent
        self._advance()
        self._pop_required(parent, TokenType.keyword, KeywordType.CLASS)
        self.class_name = self._token()[1]
        self._pop_required(parent, TokenType.identifier)
        self._pop_required(parent, TokenType.symbol, "{")

        try:
            while self._is_class_var():
                self.compile_class_var_desc()

            while self._is_subroutine():
                self.compile_subroutine()
            self._pop_required(parent, TokenType.symbol, "}")
            print(self.symbol)
        finally:
            self._outputbuf.write(
                unicode(
                    et.tostring(self._root, pretty_print=True,
                                method="c14n2").decode("utf-8")))
            self.vm_writer.close()
        self._outputbuf.close()

    def _required_type(self, token_type, val=None):
        tp, tv = self._token()
        if token_type != tp or (
            (tp == TokenType.keyword or tp == TokenType.symbol) and
            (val != tv)):
            raise ValueError("token must be %s,%s" % (token_type, val))
        return tp, tv

    def compile_class_var_desc(self):
        parent = self._set_parent("classVarDec")
        # 具体可以细分变量类型检查,标识符正确检查
        parent.append(self._build_element())
        kind = self.get_kind()
        self._advance()
        itype = self.get_type()
        parent.append(self._build_element())
        self._advance()

        while not self.is_token(TokenType.symbol, ";"):
            parent.append(self._build_element())
            if self._token()[1] != "," and self._token()[1] != ";":
                self.symbol.define(self._token()[1], itype, kind)
            self._advance()
        parent.append(self._build_element())
        self._advance()
        self._remove_parent()

    def get_kind(self):
        kind = self._token()[1]
        if isinstance(kind, KeywordType):
            kind = kind.name.lower()
        return kind

    def get_type(self):
        itype = self._token()[1]
        if isinstance(itype, KeywordType):
            return itype.name.lower()
        return itype

    def compile_subroutine(self):
        print(self.symbol)
        self.symbol.start_subroutine()
        parent = self._set_parent("subroutineDec")
        method_type = self._token()[1]
        self.method_type = method_type
        self._advance()
        self.return_type = self._token()[1]
        self._advance()
        function_name = self._token()[1]
        self._advance()
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_parameter_list()
        full_name = "{}.{}".format(self.class_name, function_name)

        self._pop_required(parent, TokenType.symbol, ")")
        self._compile_body(full_name, method_type)
        self._remove_parent()
        self.vm_writer.write_comment("end function %s" % function_name)
        self.vm_writer.write_comment("")
        # if self._tokenizer.token_type()==TokenType.KEY_WORD:

    def _compile_body(self, full_name, method_type):
        parent = self._set_parent("subroutineBody")
        self._pop_required(parent, TokenType.symbol, "{")
        while self._is_var_desc():
            self.compile_var_desc()

        var_cnt = self.symbol.var_count("var")
        field_cnt = self.symbol.var_count("field")
        self.vm_writer.write_function(full_name, var_cnt)
        if method_type == KeywordType.CONSTRUCTOR:
            #  构造函数分配对象内存
            self.vm_writer.write_push(SEG_CONSTANT, field_cnt)
            self.vm_writer.write_call("Memory.alloc", "1")
            self.vm_writer.write_pop(SEG_POINTER, "0")
        elif method_type == KeywordType.METHOD:
            # 成员方法,设置this=arg[0]
            self.vm_writer.write_push(SEG_ARG, "0")
            self.vm_writer.write_pop(SEG_POINTER, "0")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self._remove_parent()

    def _remove_parent(self):
        self._cur_root.pop()

    def compile_parameter_list(self):
        kind = "arg"
        while not self.is_token(TokenType.symbol, ")"):
            itype = self.get_type()
            self._advance()
            name = self._token()[1]
            self.symbol.define(name, itype, kind)
            self._advance()
            # parent.append(self._build_element())
            if self.is_token(TokenType.symbol, ","):
                self._advance()

    def compile_var_desc(self):
        parent = self._set_parent("varDec")
        self._pop_required(parent, TokenType.keyword, KeywordType.VAR)
        kind = "var"
        itype = self.get_type()
        parent.append(self._build_element())
        self._advance()

        while not self.is_token(TokenType.symbol, ";"):
            # parent.append(self._build_element())
            if not self.is_token(TokenType.symbol, ",") and not self.is_token(
                    TokenType.symbol, ";"):
                self.symbol.define(self._token()[1], itype, kind)
            self._advance()
        self._pop_required(parent, TokenType.symbol, ";")
        self._remove_parent()

    def compile_statements(self):

        self._set_parent("statements")

        while self._is_statement():
            if self.is_let_statement():
                self.compile_let()
            if self.is_do_statement():
                self.compile_do()
            if self.is_return_statement():
                self.compile_return()
            if self.is_if_statement():
                self.compile_if()
                continue
            if self.is_while_statement():
                self.compile_while()
                continue
        self._remove_parent()

    def compile_do(self):
        parent = self._set_parent("doStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.DO)
        type1, id1 = self._pop_required(parent, TokenType.identifier)
        self.compile_call(type1, id1)
        self.vm_writer.write_pop(SEG_TEMP, 0)
        self._pop_required(parent, TokenType.symbol, ";")
        self._remove_parent()

    def compile_call(self, typ1, id1):
        parent = None
        symbol_kind = self.symbol.kind_of(id1)
        # 调用变量方法
        n_args = 0
        typ2, id2 = self._token()
        if id2 == ".":
            if symbol_kind:
                # 变量类型
                function_type = self.symbol.type_of(id1)
                # this 指针入栈
                if symbol_kind == "arg":
                    self.vm_writer.write_push("argument",
                                              self.symbol.index_of(id1))
                elif symbol_kind == "static":
                    self.vm_writer.write_push("static",
                                              self.symbol.index_of(id1))
                elif symbol_kind == "var":
                    self.vm_writer.write_push("local",
                                              self.symbol.index_of(id1))
                elif symbol_kind == "field":
                    self.vm_writer.write_push("this",
                                              self.symbol.index_of(id1))
                n_args += 1
            else:
                # 静态方法
                function_type = id1
            self._advance()
            _, method_name = self._pop_required(parent, TokenType.identifier)
            full_name = "%s.%s" % (function_type, method_name)
        else:
            n_args += 1
            self.vm_writer.write_push("pointer", 0)
            function_type = self.class_name
            full_name = "%s.%s" % (function_type, id1)
        self._n_args.append(n_args)
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_expression_list()
        self._pop_required(parent, TokenType.symbol, ")")
        n_args = self._n_args.pop(-1)
        self.vm_writer.write_call(full_name, n_args=n_args)

    def compile_let(self):
        parent = self._set_parent("letStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.LET)
        tk, val = self._pop_required(parent, TokenType.identifier)
        seg, idx = self.get_var_seg_idx(val)
        is_arr = False
        if self.is_token(TokenType.symbol, "["):
            is_arr = True
            self._advance()
            self.compile_expression()
            self.vm_writer.write_push(seg, idx)
            self.vm_writer.write_arithmetic("+")
            self._pop_required(parent, TokenType.symbol, "]")

        # 有可能是数组
        # 替换正则
        self._pop_required(parent, TokenType.symbol, "=")
        self.compile_expression()
        if is_arr:
            self.vm_writer.write_pop(SEG_TEMP, "0")
            self.vm_writer.write_pop(SEG_POINTER, "1")
            self.vm_writer.write_push(SEG_TEMP, "0")
            self.vm_writer.write_pop(SEG_THAT, "0")
        else:
            self.vm_writer.write_pop(seg, idx)
        self._pop_required(parent, TokenType.symbol, ";")
        self._remove_parent()

    def compile_while(self):
        self.vm_writer.write_comment("start while")
        parent = self._set_parent("whileStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.WHILE)
        label1 = self._get_label()
        self.vm_writer.write_label(label1)
        label2 = self._get_label()
        self._pop_required(parent, TokenType.symbol, "(")
        self.compile_expression()
        self.vm_writer.write_arithmetic("~")
        self._pop_required(parent, TokenType.symbol, ")")
        self.vm_writer.write_if(label2)
        self._pop_required(parent, TokenType.symbol, "{")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self.vm_writer.write_goto(label1)
        self.vm_writer.write_label(label2)
        self._remove_parent()

        self.vm_writer.write_comment("end while")

    def compile_return(self):
        parent = self._set_parent("returnStatement")
        self._pop_required(parent, TokenType.keyword, KeywordType.RETURN)
        if not self.is_token(TokenType.symbol, ";"):
            self.compile_expression()
        self._pop_required(parent, TokenType.symbol, ";")
        if self.return_type == KeywordType.VOID:
            self.vm_writer.write_push(SEG_CONSTANT, 0)
        self.vm_writer.write_return()
        self._remove_parent()

    def compile_if(self):
        parent = self._set_parent("ifStatement")
        self.vm_writer.write_comment("compile if")
        self._pop_required(parent, TokenType.keyword, KeywordType.IF)
        self._pop_required(parent, TokenType.symbol, "(")
        label1 = self._get_label()
        label2 = self._get_label()
        self.compile_expression()
        self.vm_writer.write_arithmetic("~")
        self.vm_writer.write_if(label1)
        self._pop_required(parent, TokenType.symbol, ")")
        self._pop_required(parent, TokenType.symbol, "{")
        self.compile_statements()
        self._pop_required(parent, TokenType.symbol, "}")
        self.vm_writer.write_goto(label2)
        self.vm_writer.write_label(label1)
        if self.is_token(TokenType.keyword, KeywordType.ELSE):
            self._pop_required(parent, TokenType.keyword, KeywordType.ELSE)
            self._pop_required(parent, TokenType.symbol, "{")
            self.compile_statements()
            self._pop_required(parent, TokenType.symbol, "}")
        self.vm_writer.write_label(label2)
        self._remove_parent()

        self.vm_writer.write_comment(" if end")

    def compile_expression(self):
        parent = self._set_parent("expression")
        op_count = 0
        ops = []
        while not self._is_end():
            self.compile_term()
            if self._is_op(False):
                _, op = self._token()
                self._advance()
                ops.append(op)
            op_count += 1
            if op_count >= 2:
                print(ops)
                self.vm_writer.write_arithmetic(ops.pop(0))
            # parent.append(self._build_element())
            # self._advance()

        self._remove_parent()

    def compile_term(self):
        parent = self._set_parent("term")
        first = True
        while not self._is_op(first) and not self._is_end():
            first = False
            if self.is_token(TokenType.symbol, "("):
                self._advance()
                self.compile_expression()
                self._pop_required(parent, TokenType.symbol, ")")

            elif self._is_unary_op():
                token, op = self._token()
                self._advance()
                op = "neg" if op == "-" else op
                self.compile_term()
                self.vm_writer.write_arithmetic(op)
                continue
            elif self.is_token(TokenType.identifier):
                tk, val = self._pop_required(parent, TokenType.identifier)
                if self.is_token(TokenType.symbol, "(") or self.is_token(
                        TokenType.symbol, "."):
                    self.compile_call(tk, val)
                elif self.is_token(TokenType.symbol, "["):
                    self._advance()
                    self.compile_expression()
                    seg, idx = self.get_var_seg_idx(val)
                    self.vm_writer.write_push(seg, idx)
                    # 数组直接计算基址,通过that[0]访问
                    # fixme a[0] 这种常数的访问
                    self.vm_writer.write_arithmetic("+")
                    self.vm_writer.write_pop(SEG_POINTER, "1")
                    self.vm_writer.write_push(SEG_THAT, "0")
                    self._pop_required(parent, TokenType.symbol, "]")
                else:
                    # 变量
                    seg, idx = self.get_var_seg_idx(val)
                    self.vm_writer.write_push(seg, idx)
            else:
                tk, val = self._token()
                if self.is_token(TokenType.integerConstant):
                    self.vm_writer.write_push(SEG_CONSTANT, val)
                elif self.is_token(TokenType.keyword, KeywordType.TRUE):
                    self.vm_writer.write_push(SEG_CONSTANT, "0")
                    self.vm_writer.write_arithmetic("~")
                elif self.is_token(TokenType.keyword, KeywordType.FALSE):
                    self.vm_writer.write_push(SEG_CONSTANT, "0")
                elif self.is_token(TokenType.keyword, KeywordType.NULL):
                    self.vm_writer.write_push(SEG_CONSTANT, "0")
                elif self.is_token(TokenType.keyword, KeywordType.THIS):
                    self.vm_writer.write_push(SEG_POINTER, "0")
                elif self.is_token(TokenType.stringConstant):
                    str_len = len(val)
                    self.vm_writer.write_push(SEG_CONSTANT, str(str_len))
                    self.vm_writer.write_call("String.new", "1")

                    for idx, x in enumerate(val):
                        self.vm_writer.write_push(SEG_CONSTANT, str(ord(x)))
                        self.vm_writer.write_call("String.appendChar", '2')

                self._advance()
        self._remove_parent()

    def _pop_required(self, parent, tk, val=None):
        tk, val = self.required(tk, val)
        self._advance()
        return tk, val

    def _is_op(self, first):
        tk, val = self._token()
        return tk == TokenType.symbol and val in '+*/&|<>=' or (val == '-'
                                                                and not first)

    def _is_unary_op(self):
        tk, val = self._token()
        return tk == TokenType.symbol and val in '-~'

    def compile_expression_list(self):
        parent = self._set_parent("expressionList")
        n_args = self._n_args[-1]
        while not self.is_token(TokenType.symbol, ")"):
            n_args += 1
            self.compile_expression()
            if self.is_token(TokenType.symbol, ","):
                self._pop_required(parent, TokenType.symbol, ",")
        self._n_args[-1] = n_args
        self._remove_parent()

    def build_identifier(self):
        e = et.Element("identifier")
        e.text = self._tokenizer.identifier()
        return e

    def build_keyword(self):
        e = et.Element("keyword")
        e.text = self._tokenizer.keyword().name.lower()
        return e

    def build_symbol(self):
        e = et.Element("symbol")
        e.text = self._tokenizer.symbol()
        return e

    def _token(self):
        # if self._tokenizer.line > 44:
        #     raise ValueError("测试代码,翻译到此停止")
        token_type = self._tokenizer.token_type()
        if self._tokenizer.token_type() == TokenType.keyword:
            a, b = token_type, self._tokenizer.keyword()
        elif self._tokenizer.token_type() == TokenType.symbol:
            a, b = token_type, self._tokenizer.symbol()
        elif self._tokenizer.token_type() == TokenType.identifier:
            a, b = token_type, self._tokenizer.identifier()
        elif self._tokenizer.token_type() == TokenType.integerConstant:
            a, b = token_type, self._tokenizer.intVal()
        elif self._tokenizer.token_type() == TokenType.stringConstant:
            a, b = token_type, self._tokenizer.stringVal()
        else:
            a, b = None, None
        print(a, b, self._tokenizer.line)
        return a, b

    def _advance(self):
        if self._tokenizer.has_more_tokens():
            self._tokenizer.advance()

    def required(self, token, val=None):
        return self._required_type(token, val)

    def _build_element(self):
        a, b = self._token()
        e = et.Element(a.name)
        if isinstance(b, KeywordType):
            e.text = b.name.lower()
        else:
            e.text = b
        return e

    def _is_class_var(self):
        return self.is_token(TokenType.keyword,
                             KeywordType.FIELD) or self.is_token(
                                 TokenType.keyword, KeywordType.STATIC)

    def is_token(self, token, val=None):
        t, v = self._token()
        if val is not None:
            return t == token and v == val
        else:
            return t == token

    def _get_parent(self):
        if len(self._cur_root) > 0:
            return self._cur_root[-1]
        else:
            return None

    def _set_parent(self, name):
        parent = self._get_parent()
        ele2 = et.Element(name)
        if parent is not None:
            parent.append(ele2)
        self._cur_root.append(ele2)
        return ele2

    def _is_subroutine(self):
        return self.is_token(TokenType.keyword, KeywordType.FUNCTION) \
               or self.is_token(TokenType.keyword, KeywordType.CONSTRUCTOR) \
               or self.is_token(TokenType.keyword, KeywordType.METHOD)

    def _is_statement(self):
        if self.is_let_statement():
            return True
        if self.is_do_statement():
            return True
        if self.is_return_statement():
            return True
        if self.is_if_statement():
            return True
        if self.is_while_statement():
            return True

    def is_while_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.WHILE)

    def is_let_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.LET)

    def is_do_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.DO)

    def is_return_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.RETURN)

    def is_if_statement(self):
        return self.is_token(TokenType.keyword, KeywordType.IF)

    def _is_var_desc(self):
        return self.is_token(TokenType.keyword, KeywordType.VAR)

    def _is_end(self):
        return self.is_token(TokenType.symbol, ";") or \
               self.is_token(TokenType.symbol, ";") \
               or self.is_token(TokenType.symbol, ")") \
               or self.is_token(TokenType.symbol, ",") \
               or self.is_token(TokenType.symbol, "]")

    def get_var_seg_idx(self, val):
        kind = self.symbol.kind_of(val)
        idx = self.symbol.index_of(val)
        if kind == "static":
            return SEG_STATIC, idx
        elif kind == "var":
            return SEG_LOCAL, idx
        elif kind == "field":
            return SEG_THIS, idx
        elif kind == "arg":
            if self.method_type == KeywordType.METHOD:
                idx += 1
            return SEG_ARG, idx

    def _get_label(self):
        label = "label_%s" % self._label_cnt
        self._label_cnt += 1
        return label
Esempio n. 15
0
class CompilationEngine:
    """
    Effects the actual compilation output. Gets its input from a JackTokenizer and
    emits its parsed structure into an output file/stream. The output is generated by a series of compilexxx()
    routines, one for every syntactic element xxx of the Jack grammar. The contract between these routines is
    that each compilexxx() routine should read the syntactic construct xxx from the input, advance() the
    tokenizer exactly beyond xxx, and output the parsing of xxx. Thus, most of the compilexxx() may only be called if
    indeed xxx is the next syntactic element of the input.
    The module outputs to the output stream, the correspond VM code.
    """
    def __init__(self, input_stream, output_stream):
        """
        Creates a new compilation engine with the
        given input and output. The next routine
        called must be compileClass().
        """
        self.__prefix = ""
        self.__tokenizer = JackTokenizer(input_stream)
        self.__writer = VMWriter(output_stream)
        self.__symbol_table = SymbolTable()
        self.__label_counter = 0
        self.__class_name = None

    def compile(self):
        """
        Compiles the whole file
        """
        self.__compile_class()

    def __compile_class(self):
        """
        Compiles a complete class
        :return: True iff the class was compiled successfully
        """
        # checks the next parts of the class and writes them to the file
        self.__check_keyword_symbol(KEYWORD_TYPE)  # "class"
        self.__check_keyword_symbol(IDENTIFIER_TYPE)  # className
        self.__class_name = self.__tokenizer.get_value(
        )  # saves the class's name for its type when creating this
        self.__check_keyword_symbol(SYMBOL_TYPE)  # "{"
        while self.__compile_class_var_dec():
            continue
        while self.__compile_subroutine(False):
            self.__advance_tokenizer()

        self.__check_keyword_symbol(SYMBOL_TYPE,
                                    make_advance=False)  # block closer "}"

    def __compile_class_var_dec(self, make_advance=True):
        """
        Compiles a static declaration or a field declaration
        :param: make_advance: boolean parameter- should make advance before the first call or not. Default value is True
        :return: True iff there was a valid class var declaration
        """
        if not self.__check_keyword_symbol(
                KEYWORD_TYPE, CLASS_VAR_DEC_KEYWORDS, make_advance):
            # It is not a class var dec
            return False

        var_kind = self.__tokenizer.get_value()  # saves the variable's kind
        self.__check_type()
        var_type = self.__tokenizer.get_value()  # saves the variable's type
        self.__check_keyword_symbol(IDENTIFIER_TYPE)  # varName
        var_name = self.__tokenizer.get_value()  # saves the variable's name
        self.__symbol_table.define(
            var_name, var_type,
            var_kind)  # adds the variable to the symbol table

        # adds all additional variables to the symbol table
        while self.__check_keyword_symbol(
                SYMBOL_TYPE,
            [ADDITIONAL_VAR_OPTIONAL_MARK]):  # "," more varName
            self.__check_keyword_symbol(IDENTIFIER_TYPE)  # varName
            var_name = self.__tokenizer.get_value(
            )  # saves the variable's name
            self.__symbol_table.define(
                var_name, var_type,
                var_kind)  # adds the variable to the symbol table

        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ";"

        return True

    def __compile_subroutine(self, make_advance=True):
        """
        Compiles a complete method, function, or constructor.
        :param: make_advance: boolean parameter- should make advance before the first call or not. Default value is True
        :return: True iff there was a valid subroutine declaration
        """
        if not self.__check_keyword_symbol(
                KEYWORD_TYPE, SUBROUTINE_DEC_KEYWORDS, make_advance):
            # It is not a subroutine
            return False

        self.__symbol_table.start_subroutine()  # creates new subroutine table

        is_constructor = False
        # adds this object in case of a method
        if self.__tokenizer.get_value() == METHOD_DEC_KEYWORD:
            self.__symbol_table.define(THIS_CONSTANT, self.__class_name,
                                       ARG_SEGMENT_KEYWORD)
        elif self.__tokenizer.get_value() == CONSTRUCTOR_DEC_KEYWORD:
            is_constructor = True

        if not self.__check_keyword_symbol(KEYWORD_TYPE):  # not void
            self.__check_type(False)
        self.__check_keyword_symbol(IDENTIFIER_TYPE)  # subroutineName
        func_name = self.__tokenizer.get_value()  # saves the function's mame
        self.__check_keyword_symbol(SYMBOL_TYPE)  # "("

        self.__compile_parameter_list()
        # advance was made in the compile_parameter_list without use
        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ")"
        self.__compile_subroutine_body(func_name, is_constructor)

        return True

    def __compile_subroutine_body(self, subroutine_name, is_constructor):
        """
        Compiles a subroutine body
        :param: subroutine_name: The name of the current subroutine (function/method/constructor's name)
        """
        self.__check_keyword_symbol(SYMBOL_TYPE)  # '{'

        vars_amount = 0  # number of locals the function needs
        # compiles and writes all variable declarations
        current_dec_var_amount = self.__compile_var_dec()
        while current_dec_var_amount:  # as long there are more declaration
            vars_amount += current_dec_var_amount  # adds the last amount of vars that were declared
            current_dec_var_amount = self.__compile_var_dec()

        self.__writer.write_function(
            self.__class_name, subroutine_name,
            vars_amount)  # writes the function's title
        # creates the object in case of a constructor
        if is_constructor:
            num_of_fields = self.__symbol_table.var_count(
                FIELD_SEGMENT_KEYWORD)
            self.__writer.write_push(
                CONSTANT_SEGMENT, num_of_fields
            )  # push the number of fields needed for the object
            self.__writer.write_call(
                ALLOC_FUNCTION, ALLOC_ARGS_NUM)  # calls the alloc function
            self.__writer.write_pop(
                POINTER_SEGMENT,
                THIS_POINTER_INDEX)  # anchors this at the base address
        elif self.__symbol_table.get_index_of(THIS_CONSTANT) is not None:
            # this was pushed for the method - pop it to this segment
            self.__push_var(THIS_CONSTANT)
            self.__writer.write_pop(POINTER_SEGMENT, THIS_POINTER_INDEX)

        # compiles the statements of the subroutine
        self.__compile_statements()

        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # '}'

    def __compile_parameter_list(self):
        """
        Compiles a (possibly empty) parameter list, not including the enclosing "()".
        In any way, the function advance the tokenizer
        """
        if self.__check_type():
            var_type = self.__tokenizer.get_value()  # gets the variable's type
            self.__check_keyword_symbol(IDENTIFIER_TYPE)  # varName
            var_name = self.__tokenizer.get_value()  # gets the variable's name
            self.__symbol_table.define(
                var_name, var_type,
                ARG_SEGMENT_KEYWORD)  # add the variable to the symbol table

            # adds all additional parameters to the symbol table
            while self.__check_keyword_symbol(
                    SYMBOL_TYPE,
                [ADDITIONAL_VAR_OPTIONAL_MARK]):  # "," more varName
                self.__check_type()
                var_type = self.__tokenizer.get_value(
                )  # gets the variable's type
                self.__check_keyword_symbol(IDENTIFIER_TYPE)  # varName
                var_name = self.__tokenizer.get_value(
                )  # gets the variable's name
                # add the variable to the symbol table
                self.__symbol_table.define(var_name, var_type,
                                           ARG_SEGMENT_KEYWORD)

    def __compile_var_dec(self):
        """
        checks if the current token is set to variable declaration, If so, returns true and writes the tokens
        to the stream. Otherwise, doesn't write to the stream, and returns False
        :return: number of variables that were declared. If the current token is not set to the beginning of
        variable declaration, returns 0
        """
        vars_amount = 0
        # checks if the current token is set to 'var', which means it is a var declaration
        if not self.__check_keyword_symbol(KEYWORD_TYPE,
                                           VAR_KEYWORDS):  # 'var'
            return vars_amount

        vars_amount += 1  # first variable declaration
        self.__check_type()
        var_type = self.__tokenizer.get_value()

        self.__check_keyword_symbol(IDENTIFIER_TYPE)  # variableName
        var_name = self.__tokenizer.get_value()
        self.__symbol_table.define(
            var_name, var_type,
            VAR_SEGMENT_KEYWORD)  # add the variable to symbol table

        # adds all additional variables to the symbol table
        while self.__check_keyword_symbol(SYMBOL_TYPE,
                                          [ADDITIONAL_VAR_OPTIONAL_MARK]):
            vars_amount += 1  # more variable declarations
            self.__check_keyword_symbol(IDENTIFIER_TYPE)  # variableName
            var_name = self.__tokenizer.get_value()
            self.__symbol_table.define(var_name, var_type, VAR_SEGMENT_KEYWORD)

        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ';'
        return vars_amount

    def __compile_statements(self):
        """
        compiles the statements inside a subroutine.
        Assumes the tokenizer is advanced for the first call.
        """
        # compiling all statements
        while self.__check_keyword_symbol(KEYWORD_TYPE, STATEMENTS_LIST,
                                          False):
            # checking which statement to compile
            if self.__tokenizer.get_value() == LET_KEYWORD:
                self.__compile_let()
            elif self.__tokenizer.get_value() == DO_KEYWORD:
                self.__compile_do()
            elif self.__tokenizer.get_value() == WHILE_KEYWORD:
                self.__compile_while()
            elif self.__tokenizer.get_value() == RETURN_KEYWORD:
                self.__compile_return()
            else:
                self.__compile_if()

    def __compile_do(self):
        """
        Compiles a do statement.
        Assumes the tokenizer is advanced for the first call.
        Advance the tokenizer at the end
        """
        self.__check_keyword_symbol(KEYWORD_TYPE, make_advance=False)  # 'do'

        # advance the tokenizer for the subroutine call
        self.__check_keyword_symbol(
            IDENTIFIER_TYPE)  # identifier that would be operate on
        self.__check_subroutine_call()
        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ';'
        self.__writer.write_pop(TEMP_SEGMENT, 0)

        self.__advance_tokenizer()

    def __compile_let(self):
        """
        Compiles a let statement.
        Assumes the tokenizer is advanced for the first call.
        Advance the tokenizer at the end.
        """
        self.__check_keyword_symbol(KEYWORD_TYPE, make_advance=False)  # 'let'

        self.__check_keyword_symbol(IDENTIFIER_TYPE)  # varName
        left_side_var = self.__tokenizer.get_value()
        is_left_side_array_access = False  # mark if the left side variable is an array access

        # compile the left side of the equation
        if self.__check_keyword_symbol(
                SYMBOL_TYPE,
            [OPEN_ARRAY_ACCESS_BRACKET]):  # array access, if not: =
            is_left_side_array_access = True
            self.__analyze_array_var(left_side_var)
            self.__check_keyword_symbol(SYMBOL_TYPE)  # '='

        # compile the right side of the equation
        self.__advance_tokenizer()  # advance the tokenizer for the expression
        self.__compile_expression()
        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ';'
        self.__advance_tokenizer()

        # assign the right side of the equation (that is in the stack) into the left side
        if is_left_side_array_access:
            # assign into an array
            self.__writer.write_pop(TEMP_SEGMENT, 0)
            self.__writer.write_pop(POINTER_SEGMENT, THAT_POINTER_INDEX)
            self.__writer.write_push(TEMP_SEGMENT, 0)
            self.__writer.write_pop(THAT_SEGMENT, 0)
        else:
            # assign into any other variable directly
            self.__writer.write_pop(
                self.__symbol_table.get_kind_of(left_side_var),
                self.__symbol_table.get_index_of(left_side_var))

    def __compile_while(self):
        """
        Compiles a while statement.
        Assumes the tokenizer is advanced for the first call.
        Advance the tokenizer at the end.
        """
        self.__check_keyword_symbol(KEYWORD_TYPE,
                                    make_advance=False)  # 'while'
        self.__check_keyword_symbol(SYMBOL_TYPE)  # '('

        # writes the loop label
        start_loop_label = self.__label_counter
        self.__label_counter += 1
        self.__writer.write_label(start_loop_label)
        # advance the tokenizer for the expression
        self.__advance_tokenizer()
        self.__compile_expression()
        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ')'
        self.__writer.write_arithmetic(NOT_OPERATOR, True)
        # if the expression is false, goto the next label
        end_loop_label = self.__label_counter
        self.__label_counter += 1
        self.__writer.write_if(end_loop_label)

        self.__check_keyword_symbol(SYMBOL_TYPE)  # '{'
        # advance the tokenizer for the statements
        self.__advance_tokenizer()
        self.__compile_statements()
        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # '}'

        self.__advance_tokenizer()

        # goes back to the top of the label
        self.__writer.write_goto(start_loop_label)
        self.__writer.write_label(end_loop_label)  # writes the end loop label

    def __compile_return(self):
        """
        Compiles a return statement.
        Assumes the tokenizer is advanced for the first call.
        Advance the tokenizer at the end.
        """
        self.__check_keyword_symbol(KEYWORD_TYPE,
                                    make_advance=False)  # 'return'

        if not self.__check_keyword_symbol(SYMBOL_TYPE, [END_LINE_MARK]):
            if self.__tokenizer.get_value() == THIS_CONSTANT and \
                            self.__symbol_table.get_type_of(THIS_CONSTANT) is None:
                # returning this in the constructor - push pointer 0
                self.__writer.write_push(POINTER_SEGMENT, THIS_POINTER_INDEX)
                self.__advance_tokenizer()
            else:
                # returning an expression
                self.__compile_expression()
            self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ';'
        else:
            # return void - push a junk constant 0 for a return value
            self.__writer.write_push(CONSTANT_SEGMENT, 0)

        self.__advance_tokenizer()

        self.__writer.write_return()

    def __compile_if(self):
        """
        Compiles an if statement, possibly with a trailing else clause.
        Assumes the tokenizer is advanced for the first call.
        Advance the tokenizer at the end.
        """
        self.__check_keyword_symbol(KEYWORD_TYPE, make_advance=False)  # 'if'

        self.__check_keyword_symbol(SYMBOL_TYPE)  # '('
        # advance the tokenizer for the expression
        self.__advance_tokenizer()
        self.__compile_expression()
        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ')'
        self.__writer.write_arithmetic(NOT_OPERATOR, True)
        # if the expression is false, goto the next label (else label)
        else_label = self.__label_counter
        self.__label_counter += 1
        self.__writer.write_if(else_label)

        self.__check_keyword_symbol(SYMBOL_TYPE)  # '{'
        # advance the tokenizer for the statements
        self.__advance_tokenizer()
        self.__compile_statements()
        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # '}'

        end_if_label = self.__label_counter
        self.__label_counter += 1
        self.__writer.write_goto(
            end_if_label)  # goto the end of the if statement

        self.__writer.write_label(else_label)  # writes else label
        if self.__check_keyword_symbol(KEYWORD_TYPE, [ELSE_KEYWORD]):  # 'else'
            self.__check_keyword_symbol(SYMBOL_TYPE)  # '{'
            # advance the tokenizer for the statements
            self.__advance_tokenizer()
            self.__compile_statements()
            self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # '}'
            self.__advance_tokenizer()

        self.__writer.write_label(
            end_if_label)  # write the end if statement label

    def __compile_expression(self):
        """
        compiles an expression
        Assumes the tokenizer is advanced for the first call.
        Advances the tokenizer at the end
        """
        # compiles the first term
        self.__compile_term()

        # compiles all the op + term that exists
        while self.__check_op(False):
            op = self.__tokenizer.get_value()
            self.__advance_tokenizer()
            self.__compile_term()
            self.__writer.write_arithmetic(op)

    def __compile_term(self):
        """
        compiles a term
        Assumes the tokenizer is advanced for the first call.
        Advances the tokenizer at the end
        """
        # checks for all the term options:
        # integer constant
        if self.__tokenizer.get_token_type() == INTEGER_CONST_TYPE:
            self.__writer.write_push(CONSTANT_SEGMENT,
                                     int(self.__tokenizer.get_value()))
            self.__advance_tokenizer()
        # string constant
        elif self.__tokenizer.get_token_type() in STRING_CONST_TYPE:
            self.__compile_string_constant()
            self.__advance_tokenizer()
        # keyword constant
        elif self.__check_keyword_symbol(KEYWORD_TYPE, KEYWORD_CONSTANT_LIST,
                                         False):
            if self.__tokenizer.get_value() == THIS_CONSTANT:  # push this
                self.__writer.write_push(POINTER_SEGMENT, 0)
            elif self.__tokenizer.get_value() == TRUE_CONSTANT:  # push -1
                self.__writer.write_push(CONSTANT_SEGMENT, 1)
                self.__writer.write_arithmetic(MINUS, True)
            else:  # false/null- push 0
                self.__writer.write_push(CONSTANT_SEGMENT, 0)
            self.__advance_tokenizer()
        # (expression)
        elif self.__check_keyword_symbol(SYMBOL_TYPE, [OPEN_BRACKET], False):
            self.__advance_tokenizer()
            self.__compile_expression()
            self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ')'
            self.__advance_tokenizer()
        # unaryOp + term
        elif self.__check_unary_op(False):
            op = self.__tokenizer.get_value()
            self.__advance_tokenizer()
            self.__compile_term()
            self.__writer.write_arithmetic(op, True)
        # varName / varName[expression] / subroutineCall- in any case, starts with identifier
        else:
            # self.__check_keyword_symbol(IDENTIFIER_TYPE)
            identifier_name = self.__tokenizer.get_value()
            # checks for function/method call
            if self.__check_subroutine_call():
                return
            # varName[expression]
            if self.__check_keyword_symbol(SYMBOL_TYPE,
                                           [OPEN_ARRAY_ACCESS_BRACKET], False):
                self.__analyze_array_var(identifier_name)
                self.__writer.write_pop(POINTER_SEGMENT,
                                        THAT_POINTER_INDEX)  # pop pointer 1
                self.__writer.write_push(THAT_SEGMENT, 0)  # push that 0
                self.__advance_tokenizer()
            # varName
            else:
                self.__push_var(identifier_name)  # push the var

    def __analyze_array_var(self, identifier_name):
        """
        varName[expression]
        operate varName + expression
        :param identifier_name: the variable'a name
        """
        self.__push_var(identifier_name)  # push the var
        self.__advance_tokenizer()
        self.__compile_expression()  # push the expression
        self.__writer.write_arithmetic(PLUS)  # varName + expression
        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ']'

    def __compile_string_constant(self):
        """
        compiles a string constant
        """
        str_const = self.__tokenizer.get_value()

        # fixing escaped characters
        str_const = str_const.replace("\t", "\\t")
        str_const = str_const.replace("\n", "\\n")
        str_const = str_const.replace("\b", "\\b")
        str_const = str_const.replace("\r", "\\r")
        str_len = len(str_const)
        self.__writer.write_push(CONSTANT_SEGMENT, str_len)
        self.__writer.write_call(STRING_CONSTRUCTOR, STRING_CONSTRUCT_NUM_ARGS)
        for char in str_const:
            self.__writer.write_push(CONSTANT_SEGMENT,
                                     ord(char))  # push the char ASCII code
            self.__writer.write_call(STRING_APPEND, STRING_APPEND_NUM_ARGS)

    def __push_var(self, var_name):
        """
        writes a push var command to the output stream to the
        :param var_name: the variable name to push to the stack
        """
        self.__writer.write_push(self.__symbol_table.get_kind_of(var_name),
                                 self.__symbol_table.get_index_of(var_name))

    def __check_subroutine_call(self):
        """
        checks if the next tokens are subroutine call. If so, writes the vm commands for the subroutine call.
        Advances the tokenizer at the end
        :return: true iff the next tokens are subroutine calls
        """
        num_args = 0
        call_name = ""
        identifier = self.__tokenizer.get_value()
        # checks if the next token is '(' : regular method call
        if self.__check_keyword_symbol(SYMBOL_TYPE, [OPEN_BRACKET]):
            call_name += self.__class_name + CALL_CLASS_METHOD_MARK + identifier
            num_args += 1  # the extra 'this' arg
            # push this
            if self.__symbol_table.get_index_of(THIS_CONSTANT) is not None:
                self.__push_var(THIS_CONSTANT)
            else:
                self.__writer.write_push(POINTER_SEGMENT, THIS_POINTER_INDEX)
        # checks if the next token is '.' : function/method call
        elif self.__check_keyword_symbol(SYMBOL_TYPE, [CALL_CLASS_METHOD_MARK],
                                         False):
            # a variable- method call
            if self.__symbol_table.get_index_of(identifier) is not None:
                var_type = self.__symbol_table.get_type_of(identifier)
                call_name += var_type
                num_args += 1  # the extra 'this' arg
                # push this
                self.__push_var(identifier)
            # function/ constructor call
            else:
                call_name += identifier

            self.__advance_tokenizer()
            func_name = self.__tokenizer.get_value()
            call_name += CALL_CLASS_METHOD_MARK + func_name
            self.__check_keyword_symbol(SYMBOL_TYPE)  # '('
        # if the next token is not ( or . : not a subroutine call
        else:
            return False

        # pushing all args
        num_args += self.__compile_expression_list()
        self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False)  # ')'
        # calling the function
        self.__writer.write_call(call_name, num_args)

        self.__advance_tokenizer()
        return True

    def __compile_expression_list(self):
        """
        compiles an expression list
        :return: the number of expressions compiled
        """
        exp_counter = 0
        self.__advance_tokenizer()

        # if the expression list is not empty: compile all the expression
        if self.__tokenizer.get_value() != CLOSE_BRACKET:
            exp_counter += 1
            # compiles the first expression
            self.__compile_expression()

            # checks for more expressions separated with comma
            while self.__check_keyword_symbol(SYMBOL_TYPE,
                                              [ADDITIONAL_VAR_OPTIONAL_MARK],
                                              False):
                exp_counter += 1
                # advances the tokenizer
                self.__advance_tokenizer()
                # compiles the next expression
                self.__compile_expression()

        return exp_counter

    def __check_keyword_symbol(self,
                               token_type,
                               value_list=None,
                               make_advance=True):
        """
        checks if the current token is from token_type (which is keyword or symbol), and it's value is one of the
        given optional values (in the value_list).
        :param token_type: the wanted type of the current token: keyword or symbol
        :param value_list: a list of optional values for the current token
        :param make_advance: whether or not the method should call tokenizer.advance() at the beginning
        :return: True if the current token is from Keyword type, and it's value exists in the keyword list,
          and false otherwise
        """
        if make_advance:
            if self.__tokenizer.has_more_tokens():
                self.__tokenizer.advance()
            else:
                return False
        if self.__tokenizer.get_token_type() == token_type:
            if value_list is None or self.__tokenizer.get_value(
            ) in value_list:
                return True

        return False

    def __check_type(self, make_advance=True):
        """
        checks if the current token is a type.
        :param make_advance: whether or not the method should call tokenizer.advance() at the beginning
        :return: true iff the current token is a type
        """
        # checks for builtin types
        if self.__check_keyword_symbol(KEYWORD_TYPE, TYPE_LIST, make_advance):
            return True
        # checks for user-defined class types
        if not self.__check_keyword_symbol(IDENTIFIER_TYPE,
                                           make_advance=False):
            return False

        return True

    def __check_op(self, make_advance=True):
        """
        :return: true iff the current token is a symbol containing an operation
        """
        return self.__check_keyword_symbol(SYMBOL_TYPE, OP_LIST, make_advance)

    def __check_unary_op(self, make_advance=True):
        """
        :return: true iff the current token is a symbol containing an unary operation
        """
        return self.__check_keyword_symbol(SYMBOL_TYPE, UNARY_OP_LIST,
                                           make_advance)

    def __advance_tokenizer(self):
        """
        advances the inner tokenizer in case when there must be more tokens
        """
        self.__tokenizer.has_more_tokens(
        )  # when there must be more tokens, otherwise the input is invalid
        self.__tokenizer.advance()
class CompilationEngine:

    def __init__(self, input_file, output_file):
        self.jack_tokenizer = JackTokenizer(input_file)
        self.symbol_table = SymbolTable()
        self.writer = VMWriter(output_file)
        self.class_name = ""
        self.subroutine_name = ""
        self.return_type = ""
        self.label_counter_if = 0
        self.label_counter_while = 0
        self.num_args_called_function = 0
        self.is_unary = False
        self.dic_arithmetic = {"+" : "add" , "-" : "sub", "*" : "call Math.multiply 2",
                               "/" : "call Math.divide 2", "&" : "and", "|" : "or", "<" : "lt", ">" : "gt", "=" : "eq"}

    def compile_class(self):
        # "class className {
        for i in range(NUM_TOKENS_CLASS_DEC):
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # saves the className
            if self.jack_tokenizer.token_type() == IDENTIFIER:
                self.class_name = self.jack_tokenizer.identifier()
        # classVarDec* or SubroutineDec*
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "static" or
                                                  self.jack_tokenizer.key_word() == "field"):
                self.compile_class_var_dec()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "function" or
                                                  self.jack_tokenizer.key_word() == "method" or
                                                  self.jack_tokenizer.key_word() == "constructor"):
                self.compile_subroutine()
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                break

    def compile_class_var_dec(self):
        # "static" of "field"
        kind = self.jack_tokenizer.key_word()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # type
        if self.jack_tokenizer.token_type() == KEYWORD:
            type = self.jack_tokenizer.key_word()
        else:
            type = self.jack_tokenizer.identifier()
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name,type,kind)
            elif token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == ";":
                    break


    def compile_subroutine(self):
        self.symbol_table.start_subroutine()
        self.subroutine_name = ""
        self.return_type = ""
        self.label_counter_if = 0
        self.label_counter_while = 0
        #  the curr token : "constructor" or "function" or "method
        type_of_subroutine = self.jack_tokenizer.key_word()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # the curr token : return type of the subroutine
        if self.jack_tokenizer.token_type() == KEYWORD:
            self.return_type = self.jack_tokenizer.key_word()
        else:
            self.return_type = self.jack_tokenizer.identifier()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        self.subroutine_name = self.jack_tokenizer.identifier()
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.symbol() == "(":
                if type_of_subroutine == "method":
                    self.symbol_table.define(THIS, self.class_name, ARG)
                self.compile_parameter_list()
                # the curr token should be -  ")"
            if self.jack_tokenizer.symbol() == '{':
                while self.jack_tokenizer.has_more_tokens():
                    self.jack_tokenizer.advance()
                    token_type = self.jack_tokenizer.token_type()
                    if token_type == KEYWORD:
                        if self.jack_tokenizer.key_word() == "var":
                            self.compile_var_dec()
                            continue
                        else:
                            self.writer.write_function(self.class_name +
                                                       "." + self.subroutine_name, self.symbol_table.var_count(VAR))
                            if type_of_subroutine == "constructor":
                                self.writer.write_push(CONST, self.symbol_table.var_count(FIELD))
                                self.writer.write_call("Memory.alloc", 1)
                                self.writer.write_pop("pointer", 0)
                            elif type_of_subroutine == "method":
                                self.writer.write_push(ARGUMENT, 0)
                                self.writer.write_pop("pointer", 0)
                            self.compile_statements()
                            # the curr token should be -  "}"
                            break
                break


    def compile_parameter_list(self):
        kind = ARG
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            # int, bool....
            if token_type == KEYWORD:
                type = self.jack_tokenizer.key_word()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name, type, kind)
            # className
            elif token_type == IDENTIFIER:
                type = self.jack_tokenizer.identifier()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name, type, kind)
            # end of parameter list
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == ")":
                    break


    def compile_var_dec(self):
        # should be "var"
        kind = self.jack_tokenizer.key_word()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # type
        if self.jack_tokenizer.token_type() == KEYWORD:
            type = self.jack_tokenizer.key_word()
        else:
            type = self.jack_tokenizer.identifier()
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name, type, kind)
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == ";":
                    break


    def compile_statements(self):
        while True:
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "do":
                self.compile_do()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "let":
                self.compile_let()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "while":
                self.compile_while()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "return":
                self.compile_return()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            # compile_if returns advanced
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "if":
                self.compile_if()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}":
                break


    def compile_do(self):
        self.num_args_called_function = 0
        self.compile_subroutine_call()
        self.writer.write_pop(TEMP , 0)
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # return from compile_subroutine_call with ";"

    def compile_let(self):
        init = True
         # the curr token - "let"
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                name = self.jack_tokenizer.identifier()
                type = self.symbol_table.type_of(name)
                kind = self.symbol_table.kind_of(name)
                index = self.symbol_table.index_of(name)
            if token_type == SYMBOL:
                # there is an assignment to an array
                if self.jack_tokenizer.symbol() == "[":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    #  handle  - [expression]
                    self.compile_expression()
                    # the curr token -  "]"
                    self.writer.write_push(self.find_segment(kind), index)
                    self.writer.write_arithmetic("add")
                    self.writer.write_pop("pointer", 1)
                    init = False
                # should return from the compile_expression only with ";" or "]"
                if self.jack_tokenizer.symbol() == "=":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # handle the = expression
                    self.compile_expression()
                    # that is only for array
                    if init == False: # was also if type == "Array"
                        self.writer.write_pop(THAT, 0)
                    else:
                        self.writer.write_pop(self.find_segment(kind), index)
                # end of let statement
                if self.jack_tokenizer.symbol() == ";":
                    break


    def compile_while(self):
        while_counter = self.label_counter_while
        self.label_counter_while += 1
        # the curr token - "while"
        self.writer.write_label("WHILE_EXP" + str(while_counter))
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # the curr token - ")"
                    self.writer.write_arithmetic("not")
                    self.writer.write_if("WHILE_END" + str(while_counter))
                if self.jack_tokenizer.symbol() == "{":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                    # the curr token - "}"
                    self.writer.write_go_to("WHILE_EXP" + str(while_counter))
                    self.writer.write_label("WHILE_END" + str(while_counter))
                if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break


    def compile_return(self):
        # the curr token - "return"
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ";":
            self.writer.write_push(CONST, "0")
        else:
            self.compile_expression()
            # should return from "compile_expression" only with ";"
        self.writer.write_return()

    def compile_if(self):
        if_counter = self.label_counter_if
        self.label_counter_if += 1
        # the curr token - "if"
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # the curr token - ")"
                    self.writer.write_if("IF_TRUE" + str(if_counter))
                    self.writer.write_go_to("IF_FALSE" + str(if_counter))
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "{":
                    self.writer.write_label("IF_TRUE" + str(if_counter))
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                # ~~~~~~~~~~ change : was token_type ~~~~~~~~~~~~~~
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "else":
            # print "else"
            self.writer.write_go_to("IF_END" + str(if_counter))
            self.writer.write_label("IF_FALSE" + str(if_counter))
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # print "{"
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            self.compile_statements()
            # print "}"
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            self.writer.write_label("IF_END" + str(if_counter))
        else:
            self.writer.write_label("IF_FALSE" + str(if_counter))


    def compile_subroutine_call(self):
        to_add = False
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # "subRoutineName" or ("className" | "varName", as part of className.subRoutineName)
        called_statement = self.jack_tokenizer.identifier()
        type = self.symbol_table.type_of(called_statement)
        kind = self.symbol_table.kind_of(called_statement)
        index = self.symbol_table.index_of(called_statement)


        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # case of "subRoutineCall(expressionList)
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
            to_add = True
            called_statement = self.class_name + "." + called_statement
            self.writer.write_push(POINTER, 0)
            self.compile_expression_list()
            # the curr token - ")"
        # (className | varName).subroutineName(expressionList)
        elif self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # subroutineName
            if kind <> NONE:
                to_add = True
                self.writer.write_push(self.find_segment(kind), index)
                called_statement = type + "." + self.jack_tokenizer.identifier()
            else:
               called_statement = called_statement + "." + self.jack_tokenizer.identifier()
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # "("
            # expressionList
            self.compile_expression_list()
            # ")"
        if to_add:
            self.writer.write_call(called_statement, self.num_args_called_function + 1)
        else:
            self.writer.write_call(called_statement, self.num_args_called_function)

    def compile_expression(self):
        is_print_unary = False
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
            self.is_unary = True
        self.compile_term()
        while self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                ["+", "-", "*", "/", "&", "|", "<", ">", "="]:
            arit_symbol = self.jack_tokenizer.symbol()
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
                self.is_unary = True
                is_print_unary = True
            self.compile_term()
            # if not is_print_unary and
            self.writer.write_arithmetic(self.dic_arithmetic[arit_symbol])


    def compile_term(self):
        while True:
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL and not self.is_unary and self.jack_tokenizer.symbol() in\
                    [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "="]:
                break
            if token_type == INT_CONST:
                self.writer.write_push(CONST, self.jack_tokenizer.int_val())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == STRING_CONST:
                self.compile_string()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == KEYWORD and self.jack_tokenizer.key_word() in ["true", "false", "null"]:
                self.writer.write_push(CONST, 0)
                if self.jack_tokenizer.key_word() == "true":
                    self.writer.write_arithmetic("not")
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            # like in return this
            if token_type == KEYWORD and self.jack_tokenizer.key_word() == "this":
                self.writer.write_push(POINTER, 0)
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() in ["~", "-"]:
                symbol = self.jack_tokenizer.symbol()
                self.is_unary = False
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_term()
                if symbol == "~":
                    self.writer.write_arithmetic("not")
                else:
                    self.writer.write_arithmetic("neg")
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "(":
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_expression()
                # should return from compile_expression only with ")"
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == IDENTIFIER:
                is_add = True
                name = self.jack_tokenizer.identifier()
                kind = self.symbol_table.kind_of(name)
                index = self.symbol_table.index_of(name)
                if name[0].isupper():
                    is_add = False
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                        [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&amp;", "&lt;","&gt;"]:
                    # in case of a > ...or b;
                    self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name))
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "[":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # should return only "]"
                    self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name))
                    self.writer.write_arithmetic("add")
                    self.writer.write_pop(POINTER, 1)
                    self.writer.write_push(THAT, 0)
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
                    self.writer.write_push(POINTER, 0)
                    self.compile_expression_list()
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # case of a = ... bar()
                    self.writer.write_call(self.class_name + "." + name,self.num_args_called_function + 1)
                    break
                # (className | varName).subroutineName(expressionList)
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # subroutineName
                    if is_add:
                        type = self.symbol_table.type_of(name)
                        name = type + "." + self.jack_tokenizer.identifier()
                    else:
                        name = name + "." + self.jack_tokenizer.identifier()
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # "("
                    # expressionList
                    if is_add:
                        self.writer.write_push(self.find_segment(kind), index)
                    self.compile_expression_list()
                    # ")"
                    if is_add:
                        self.writer.write_call(name, self.num_args_called_function + 1)
                    else:
                        self.writer.write_call(name, self.num_args_called_function)
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()

    def compile_expression_list(self):
        num_args = 0
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ")":
                break
            else:
                num_args += 1
                self.compile_expression()
                if self.jack_tokenizer.symbol() == ")":
                    break
                # print ","
        self.num_args_called_function = num_args

    def find_segment(self, kind):
        if kind == ARG:
            return ARGUMENT
        if kind == VAR:
            return LCL
        if kind == FIELD:
            return THIS
        if kind == STATIC:
            return STATIC

    def compile_string(self):
        length = len(self.jack_tokenizer.string_val())
        self.writer.write_push(CONST, length)
        self.writer.write_call("String.new", 1)
        for i in range(len(self.jack_tokenizer.string_val())):
            uni = ord(self.jack_tokenizer.string_val()[i])
            self.writer.write_push(CONST, uni)
            self.writer.write_call("String.appendChar", 2)
Esempio n. 17
0
class JackCompiler:

    def __init__(self, file_path):
        self._tokenizer = JackTokenizer(file_path)
        self._vm_writer = VMWriter(file_path.replace(".jack", "Compiled.vm"))
        self._symbol_table = SymbolTable()
        self.class_name = ""
        self.label_value = 1
        self.compile_class()

    def compile_class(self):
        self._tokenizer.next()  # ignore class
        self.class_name = self._tokenizer.next()
        self._tokenizer.next()  # ignore opening brackets

        while self._tokenizer.next() in ("static", "field"):
            self.compile_class_var_dec()
        else:
            self._tokenizer.go_back()

        while self._tokenizer.next() in ("constructor", "method", "function"):
            self.compile_subroutine()

    def compile_class_var_dec(self):
        var_kind = self._tokenizer.return_token_value()
        var_type = self._tokenizer.next()

        while True:
            self._tokenizer.next()
            var_name = self._tokenizer.return_token_value()
            self._symbol_table.define(var_name, var_type, var_kind)
            if self._tokenizer.next() == ';':
                break

    def compile_subroutine(self):
        self._symbol_table.start_subroutine()
        subroutine_type = self._tokenizer.return_token_value()

        self._tokenizer.next()  # ignore return type
        subroutine_name = self._tokenizer.next()

        # create mapping for "this" in symbol table
        # method is implicitly passed "this"
        if subroutine_type == "method":
            self._symbol_table.define("this", self.class_name, "argument")

        self._tokenizer.next()  # ignore '('
        self.compile_parameter_list()
        self._tokenizer.next()  # ignore ')'

        self._tokenizer.next()  # ignore '{'
        while self._tokenizer.next() == "var":  # var declarations
            self.compile_var_dec()
        else:
            self._tokenizer.go_back()

        self._vm_writer.write_function(self.class_name + "." + subroutine_name, self._symbol_table.var_count("var"))

        # TODO: handle constructor inside constructor as in SquareGame.new()
        if subroutine_type == "constructor":
            # allocate memory equal to number of field variables
            self._vm_writer.write_push(VMWriter.CONST_SEGMENT, self._symbol_table.var_count("field"))
            self._vm_writer.write_call("Memory.alloc", 1)
            # store pointer to new memory block as this (pointer 0)
            self._vm_writer.write_pop(VMWriter.POINTER_SEGMENT, 0)

        if subroutine_type == "method":
            self._vm_writer.write_push(VMWriter.ARG_SEGMENT, 0)  # implicit this pointer
            self._vm_writer.write_pop(VMWriter.POINTER_SEGMENT, 0)  # write to this of current scope
            for i in range(1, self._symbol_table.var_count("argument")):
                self._vm_writer.write_push(VMWriter.ARG_SEGMENT, i)
                self._vm_writer.write_pop(VMWriter.THIS_SEGMENT, i - 1)

        self.compile_statements()
        self._tokenizer.next()  # ignore closing '}' brackets

    def compile_parameter_list(self):
        if self._tokenizer.next() == ')':
            self._tokenizer.go_back()
            return
        else:
            self._tokenizer.go_back()
            parameter_type = self._tokenizer.next()
            parameter_name = self._tokenizer.next()
            self._symbol_table.define(parameter_name, parameter_type, "argument")

        while self._tokenizer.next() != ')':
            parameter_type = self._tokenizer.next()
            parameter_name = self._tokenizer.next()
            self._symbol_table.define(parameter_name, parameter_type, "argument")
        else:
            self._tokenizer.go_back()

    def compile_var_dec(self):
        var_kind = self._tokenizer.return_token_value()
        self._tokenizer.next()
        var_type = self._tokenizer.return_token_value()

        while self._tokenizer.next() != ';':
            var_name = self._tokenizer.return_token_value()
            self._symbol_table.define(var_name, var_type, var_kind)

    def compile_statements(self):
        while self._tokenizer.next() != "}":
            token_value = self._tokenizer.return_token_value()
            if token_value == "let":
                self.compile_let()
            elif token_value == "if":
                self.compile_if()
            elif token_value == "while":
                self.compile_while()
            elif token_value == "do":
                self.compile_do()
            elif token_value == "return":
                self.compile_return()

        self._tokenizer.go_back()

    def compile_let(self):
        name = self._tokenizer.next()
        index, kind = self._symbol_table.index_of(name), self._symbol_table.kind_of(name)
        array_access = False

        if self._tokenizer.next() == "[":
            if kind == "field":
                self._vm_writer.write_push(VMWriter.THIS_SEGMENT, index)
            else:
                self._vm_writer.write_push(kind, index)
            self.compile_expression()  # evaluated expression value at SP
            self._vm_writer.write_arithmetic("add")  # SP contains memory address array + base
            self._tokenizer.next()  # ignore ']'
            array_access = True
        else:
            self._tokenizer.go_back()

        self._tokenizer.next()  # ignore '='
        self.compile_expression()
        self._tokenizer.next()  # ignore ';'

        # evaluate expression and then pop value to variable on right side of assignment
        if array_access:
            self._vm_writer.write_pop(VMWriter.TEMP_SEGMENT, 0)  # pop expression value to temp register
            self._vm_writer.write_pop(VMWriter.POINTER_SEGMENT, 1)  # put array index address in THAT
            self._vm_writer.write_push(VMWriter.TEMP_SEGMENT, 0)  # re insert expression value on stack
            self._vm_writer.write_pop(VMWriter.THAT_SEGMENT, 0)  # pop expression value to array index
        else:
            self._vm_writer.write_pop(kind, index)  # pop evaluated expression to appropriate segment and index

    def compile_if(self):
        self._tokenizer.next()  # ignore '('
        self.compile_expression()
        self._vm_writer.write_arithmetic("~")
        self._vm_writer.write_if(self.label_value)
        self._tokenizer.next()  # ignore ')'

        self._tokenizer.next()  # ignore '{'
        self.compile_statements()  # if statements
        self._tokenizer.next()  # ignore '}'

        if self._tokenizer.next() == "else":
            self._tokenizer.next()  # ignore '{'
            self._vm_writer.write_goto(self.label_value + 1)
            self._vm_writer.write_label(self.label_value)
            self.compile_statements()  # if statements
            self._vm_writer.write_label(self.label_value + 1)  # skip over if when false statements
            self._tokenizer.next()  # ignore '}'
        else:
            self._tokenizer.go_back()
            self._vm_writer.write_label(self.label_value)

        self.label_value += 2

    def compile_while(self):
        self._tokenizer.next()  # ignore '('
        self._vm_writer.write_label(self.label_value)
        self.compile_expression()
        self._vm_writer.write_arithmetic("~")
        self._vm_writer.write_if(self.label_value + 1)
        self._tokenizer.next()  # ignore ')'

        self._tokenizer.next()  # ignore '{'
        self.compile_statements()
        self._vm_writer.write_goto(self.label_value)
        self._vm_writer.write_label(self.label_value + 1)
        self.label_value += 2
        self._tokenizer.next()  # ignore '}'

    def compile_do(self):
        self.compile_subroutine_call()
        self._vm_writer.write_pop(VMWriter.TEMP_SEGMENT, 0)  # discard popped value
        self._tokenizer.next()  # ignore ';'

    def compile_return(self):
        if self._tokenizer.next() == ";":
            self._vm_writer.write_push(VMWriter.CONST_SEGMENT, 0)  # void functions should return 0
        else:
            self._tokenizer.go_back()
            self.compile_expression()
            self._tokenizer.next()  # ignore ';'

        self._vm_writer.write_return()

    def compile_subroutine_call(self, sub_name=None):
        # if sub routine name is not given get next token
        # sub routine name will be given when called from compile_term
        if not sub_name:
            sub_name = self._tokenizer.next()

        # check if class function or method call
        args = 0
        if self._tokenizer.next() == '.':
            callee_name = sub_name
            sub_name = self._tokenizer.next()

            kind = self._symbol_table.kind_of(callee_name)
            if kind is None:  # function call of the form Math.multiply()
                name = callee_name + "." + sub_name
            else:  # method call of the form object.draw(this, ...)
                name = self._symbol_table.type_of(callee_name) + "." + sub_name
                self._vm_writer.write_push(kind, self._symbol_table.index_of(callee_name))
                args = 1
        else:  # implicit method call, push this as first argument
            self._tokenizer.go_back()
            name = self.class_name + "." + sub_name
            self._vm_writer.write_push(VMWriter.POINTER_SEGMENT, 0)
            args = 1

        self._tokenizer.next()  # ignore '('
        args += self.compile_expression_list()
        self._tokenizer.next()  # ignore ')'

        self._vm_writer.write_call(name, args)

    def compile_expression_list(self):
        args_count = 0

        if self._tokenizer.next() == ')':
            self._tokenizer.go_back()
            return args_count
        else:
            self._tokenizer.go_back()
            self.compile_expression()
            args_count += 1

        while self._tokenizer.next() != ')':
            self.compile_expression()
            args_count += 1
        self._tokenizer.go_back()
        return args_count

    def compile_expression(self):
        self.compile_term()

        while True:
            op = self._tokenizer.next()  # token is an op
            if op in [")", "]", ",", ";"]:  # expression termination characters
                self._tokenizer.go_back()
                break

            self.compile_term()
            if op == "/":
                self._vm_writer.write_call("Math.divide()", 2)
            elif op == "*":
                self._vm_writer.write_call("Math.multiply()", 2)
            else:
                self._vm_writer.write_arithmetic(op)

    def compile_term(self):
        self._tokenizer.next()
        token_type = self._tokenizer.token_type()
        token_value = self._tokenizer.return_token_value()
        if token_type == JackTokenizer.INT_CONST_TOKEN:
            self.compile_integer(token_value)
        elif token_type == JackTokenizer.STRING_CONST_TOKEN:
            self.compile_string(token_value)
        elif token_type == JackTokenizer.KEYWORD_TOKEN:  # only true, false, null and this
            self.compile_keyword(token_value)
        elif token_type == JackTokenizer.IDENTIFIER_TOKEN:  # subroutine or variables or array accesses
            if self._tokenizer.next() in ["(", "."]:
                self._tokenizer.go_back()
                self.compile_subroutine_call(token_value)
            else:
                self._tokenizer.go_back()
                self.compile_var_name(token_value)
        elif token_value == "(":
            self.compile_expression()
            self._tokenizer.next()  # ignore ')'
        elif token_type == JackTokenizer.SYMBOL_TOKEN:  # unary ops
            if token_value in ["-", "~"]:
                self.compile_term()
                self._vm_writer.write_arithmetic(token_value)
        else:
            exit("Invalid term")

    def compile_string(self, string):
        length = len(string)
        self._vm_writer.write_push(VMWriter.CONST_SEGMENT, length)
        self._vm_writer.write_call("String.new", 1)  # returns a new string pointer at SP

        for i in range(length):
            self._vm_writer.write_push(VMWriter.CONST_SEGMENT, ord(string[i]))
            self._vm_writer.write_function("String.appendChar", 1)  # append characters one by one to String at SP

    def compile_integer(self, value):
        self._vm_writer.write_push(VMWriter.CONST_SEGMENT, value)

    def compile_keyword(self, keyword):
        if keyword == "true":
            self._vm_writer.write_push(VMWriter.CONST_SEGMENT, 0)
            self._vm_writer.write_arithmetic("!")
        elif keyword == "this":
            self._vm_writer.write_push(VMWriter.POINTER_SEGMENT, 0)
        else:  # false and null
            self._vm_writer.write_push(VMWriter.CONST_SEGMENT, 0)

    def compile_var_name(self, name):
        index, kind = self._symbol_table.index_of(name), self._symbol_table.kind_of(name)

        if kind:  # index can be 0, so checking kind if symbol exists
            if kind == "field":
                self._vm_writer.write_push(VMWriter.THIS_SEGMENT, index)
            else:
                self._vm_writer.write_push(kind, index)
        else:
            exit("access to undefined variable")

        # evaluate array access
        if self._tokenizer.next() == "[":
            self.compile_expression()  # evaluated expression value at SP
            self._tokenizer.next()  # ignore '['
            self._vm_writer.write_arithmetic("+")
            self._vm_writer.write_pop(VMWriter.POINTER_SEGMENT, 1)  # pop array + base to THAT
            self._vm_writer.write_push(VMWriter.THAT_SEGMENT, 0)  # access [array + base] through THAT
        else:
            self._tokenizer.go_back()
Esempio n. 18
0
class CompilationEngine:
    """CompilationEngine: Effects the actual compilation output. Gets its input from a JackTokenizer and emits its parsed structure into an output file/stream."""
    def __init__(self, tokens_with_tokenType, out_vm_file):
        self.tokens_with_tokenType = tokens_with_tokenType
        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(out_vm_file)
        self.class_name = out_vm_file.stem
        self.construct_op_dict()
        self.construct_segment_dict()
        self.while_label_index = 0
        self.if_else_label_index = 0

    def construct_op_dict(self):
        self.op_dict = {
            '+': 'add',
            '-': 'sub',
            '&': 'and',
            '|': 'or',
            '<': 'lt',
            '>': 'gt',
            '=': 'eq',
        }

    def construct_segment_dict(self):
        """Translate the kind of variable to related memory segment name"""
        self.segment_dict = {
            'STATIC': 'static',
            'FIELD': 'this',
            'ARG': 'argument',
            'VAR': 'local',
        }

    def compile(self):
        compiled_etree = self.compile_tokens()
        # Uncomment following line if you want to see the output of compiled element tree
        # print(etree.tounicode(compiled_etree, pretty_print=True))
        self.vm_writer.close()

    def compile_tokens(self):
        self.compiled_output_root = etree.Element('class')
        self.compile_class()
        compiled_etree = etree.ElementTree(self.compiled_output_root)
        return compiled_etree

    def compile_new_token_ensure_token_type(self, correct_token_type, parent):
        token, token_type = self.compile_new_token(parent)
        assert token_type == correct_token_type, '{} with token_type {} not expected'.format(
            token, token_type)

    def compile_new_token_ensure_token(self, correct_token, parent):
        token, token_type = self.compile_new_token(parent)
        assert token == correct_token, '{} with token_type {} not expected'.format(
            token, token_type)

    def compile_new_token(self, parent):
        token, token_type = self.next_token_and_type()
        self.add_sub_element(parent, token_type, token)
        return token, token_type

    def add_sub_element(self, parent, element_tag, element_text):
        new_element = etree.SubElement(parent, element_tag)
        new_element.text = ' ' + element_text + ' '

    def next_token_and_type(self):
        return self.tokens_with_tokenType.pop(0)

    def show_next_token(self):
        token, token_type = self.tokens_with_tokenType[0]
        return token

    def show_next_token_and_type(self):
        return self.tokens_with_tokenType[0]

    def compile_class(self):
        """
        Compiles a complete class.
        class: 'class' className '{' classVarDec* subroutineDec* '}'
        """
        self.compile_new_token_ensure_token('class', self.compiled_output_root)
        self.compile_new_token_ensure_token_type('identifier',
                                                 self.compiled_output_root)
        self.compile_new_token_ensure_token('{', self.compiled_output_root)
        self.compile_classVarDec()
        self.compile_subroutineDec()
        self.compile_new_token_ensure_token('}', self.compiled_output_root)

    def compile_classVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        classVarDec: ('static' | 'field') type varName (',' varName)* ';'
        """
        token = self.show_next_token()
        if token in {'static', 'field'}:
            compiled_output_class_var_dec = etree.SubElement(
                self.compiled_output_root, 'classVarDec')
            symbol_kind = token.upper()
            # Add static or field
            self.compile_new_token(compiled_output_class_var_dec)
            symbol_type = self.compile_type(compiled_output_class_var_dec)
            self.compile_one_or_more_varName(compiled_output_class_var_dec,
                                             symbol_type, symbol_kind)
            self.compile_new_token_ensure_token(';',
                                                compiled_output_class_var_dec)
            # Recursive call
            self.compile_classVarDec()

    def compile_one_or_more_varName(self, parent, symbol_type, symbol_kind):
        self.add_new_symbol(symbol_type, symbol_kind)
        self.compile_new_token_ensure_token_type('identifier', parent)
        self.compile_more_varName_if_exist(parent, symbol_type, symbol_kind)

    def add_new_symbol(self, symbol_type, symbol_kind):
        """Next token is symbol_name, add this symbol_name and its symbol_type and symbol_kind to self.symbol_table"""
        symbol_name = self.show_next_token()
        self.symbol_table.define(symbol_name, symbol_type, symbol_kind)

    def compile_more_varName_if_exist(self, parent, symbol_type, symbol_kind):
        """If there is more varName, compiles them"""
        token = self.show_next_token()
        if token == ',':  # More VarName need to add
            self.compile_new_token(parent)  # Add ','
            self.add_new_symbol(symbol_type, symbol_kind)
            self.compile_new_token_ensure_token_type('identifier', parent)
            # Recursive call
            self.compile_more_varName_if_exist(parent, symbol_type,
                                               symbol_kind)

    def compile_type(self, parent):
        """
        Compiles type for var and add token element to parent.
        type: 'int' | 'char' | 'boolean' | className
        """
        token, token_type = self.compile_new_token(parent)
        assert token in {'int', 'char', 'boolean'
                         } or token_type == 'identifier'
        return token

    def compile_void_or_type(self, parent):
        """
        Compiles type or 'void' for var and add token element to parent.
        """
        token, token_type = self.compile_new_token(parent)
        assert token in {'void', 'int', 'char', 'boolean'
                         } or token_type == 'identifier'

    def compile_subroutineDec(self):
        """
        Compiles a complete method, function, or constructor.
        subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
        """
        token = self.show_next_token()
        if token in {'constructor', 'function', 'method'}:
            self.symbol_table.start_subroutine(
            )  # Reset the subroutine's symbol table
            function_kind = token
            compiled_output_subroutineDec = etree.SubElement(
                self.compiled_output_root, 'subroutineDec')
            # Add token in {'constructor', 'function', 'method'} to compiled_output_subroutineDec
            self.compile_new_token(compiled_output_subroutineDec)
            self.compile_void_or_type(compiled_output_subroutineDec)
            # subroutineName
            function_name = self.class_name + '.' + self.show_next_token()
            self.compile_new_token_ensure_token_type(
                'identifier', compiled_output_subroutineDec)
            self.compile_new_token_ensure_token('(',
                                                compiled_output_subroutineDec)
            # parameterList
            if function_kind == 'method':
                # this is a dummy symbol added to the symbol_table's ARG, for the side effect that method's number of arguments will add 1. A method with k arguments operates on k+1 arguments actually, and the first argument (argument number 0) always refers to the this object
                self.symbol_table.define('this', 'int', 'ARG')
            self.compile_parameterList(compiled_output_subroutineDec)
            self.compile_new_token_ensure_token(')',
                                                compiled_output_subroutineDec)
            # subroutineBody
            self.compile_subroutineBody(compiled_output_subroutineDec,
                                        function_name, function_kind)

            # Recursive call
            self.compile_subroutineDec()

    def compile_parameterList(self, parent):
        """
        ((type varName) (',' type varName)*)?
        """
        compiled_output_parameterList = etree.SubElement(
            parent, 'parameterList')
        token, token_type = self.show_next_token_and_type()
        if token == ')':  # No parameter need to add
            compiled_output_parameterList.text = '\n\t'  # change the print format of empty element compiled_output_parameterList
        else:  # There is at least one parameter needs to be added
            # type
            assert token in {'int', 'char', 'boolean'
                             } or token_type == 'identifier'
            symbol_kind = 'ARG'
            symbol_type = token
            self.compile_new_token(compiled_output_parameterList)  # Add type
            self.add_new_symbol(symbol_type, symbol_kind)
            # varName
            self.compile_new_token_ensure_token_type(
                'identifier', compiled_output_parameterList)
            # more paremeters
            self.compile_more_parameter(compiled_output_parameterList)

    def compile_subroutineBody(self, parent, function_name, function_kind):
        """
        subroutineBody: '{' varDec* statements '}'
        """
        compiled_output_subroutineBody = etree.SubElement(
            parent, 'subroutineBody')
        self.compile_new_token_ensure_token('{',
                                            compiled_output_subroutineBody)
        self.compile_varDec(compiled_output_subroutineBody)
        local_vars_num = self.symbol_table.count_symbol_by_kind('VAR')
        self.vm_writer.write_function(function_name, local_vars_num)
        if function_kind == 'constructor':
            # translate this=Memory.alloc(fields_num)
            fields_num = self.symbol_table.count_symbol_by_kind('FIELD')
            self.vm_writer.write_push('constant', fields_num)
            self.vm_writer.write_call('Memory.alloc', 1)
            self.vm_writer.write_pop('pointer', 0)
        elif function_kind == 'method':
            # Point the virtual this segment to the current object (using pointer 0)
            self.vm_writer.write_push(
                'argument', 0
            )  # In method, this object address will always be stored in the first argument
            self.vm_writer.write_pop('pointer', 0)
        compiled_output_statements = etree.SubElement(
            compiled_output_subroutineBody, 'statements')
        self.compile_statements(compiled_output_statements)
        self.compile_new_token_ensure_token('}',
                                            compiled_output_subroutineBody)

    def compile_more_parameter(self, parent):
        token = self.show_next_token()
        if token == ',':  # More parameter need to add
            self.compile_new_token(parent)  # Add ','
            symbol_kind = 'ARG'
            symbol_type = self.compile_type(parent)
            self.add_new_symbol(symbol_type, symbol_kind)
            self.compile_new_token_ensure_token_type('identifier', parent)
            # Recursive call
            self.compile_more_parameter(parent)

    def compile_varDec(self, parent):
        """varDec: 'var' type varName (',' varName)* ';'"""
        token = self.show_next_token()
        if token == 'var':
            compiled_output_varDec = etree.SubElement(parent, 'varDec')
            symbol_kind = token.upper()
            self.compile_new_token(compiled_output_varDec)  # Add 'var'
            symbol_type = self.compile_type(compiled_output_varDec)
            self.add_new_symbol(symbol_type, symbol_kind)
            self.compile_new_token_ensure_token_type('identifier',
                                                     compiled_output_varDec)
            self.compile_more_varName_if_exist(compiled_output_varDec,
                                               symbol_type, symbol_kind)
            self.compile_new_token_ensure_token(';', compiled_output_varDec)
            # Recursive call
            self.compile_varDec(parent)

    def compile_statements(self, parent):
        """statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement"""
        token = self.show_next_token()
        if token in {'let', 'if', 'while', 'do', 'return'}:
            if token == 'let':
                self.compile_statement_let(parent)
            elif token == 'if':
                self.compile_statement_if(parent)
            elif token == 'while':
                self.compile_statement_while(parent)
            elif token == 'do':
                self.compile_statement_do(parent)
            else:  # return
                self.compile_statement_return(parent)
            # Recursive call
            self.compile_statements(parent)

    def compile_statement_let(self, parent):
        """
        letStatement: 'let' varName ('[' expression ']')? '=' expression ';'
        vm: pop the value of expression to varName
        """
        compiled_output_statement = etree.SubElement(parent, 'letStatement')
        self.compile_new_token_ensure_token('let', compiled_output_statement)
        # varName
        symbol_name = self.show_next_token()
        self.compile_new_token_ensure_token_type('identifier',
                                                 compiled_output_statement)
        token = self.show_next_token()
        if token == '[':  # Array
            """
            code:
                arr[expression1] = expression2
            vm:
            	push arr
                push expression1
                add
                push expression2
                pop temp 0
                pop pointer 1
                push temp 0
                pop that 0
            The reason to use temp 0 and delayed pop pointer 1 after push expression2 is that expression2 may also contain arrays, for example: a[i]=b[j], then the value in pointer 1 will mess up. So we must pop the returned value by expression2 to temp 0 for the rescue.
            """
            self.write_push_variable(symbol_name)
            self.compile_new_token(compiled_output_statement)  # Add '['
            self.compile_expression(compiled_output_statement)
            self.vm_writer.write_arithmetic('add')
            self.compile_new_token_ensure_token(']', compiled_output_statement)
        self.compile_new_token_ensure_token(
            '=', compiled_output_statement)  # Add '='
        self.compile_expression(compiled_output_statement)
        if token == '[':  # Array
            # Array assignment always first align that to the address to be modified, then "pop that 0"
            self.vm_writer.write_pop('temp', 0)
            self.vm_writer.write_pop('pointer', 1)
            self.vm_writer.write_push('temp', 0)
            self.vm_writer.write_pop('that', 0)
        else:  # a varName
            self.write_pop_variable(symbol_name)
        self.compile_new_token_ensure_token(';', compiled_output_statement)

    def compile_statement_if(self, parent):
        """
        ifStatement: 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')?
        code:
        	if (cond)
                    s1
                else
                    s2
        vm:
        	VM code for computing ~(cond)
                if-goto L1
                VM code for executing s1
                goto L2
                label L1
                VM code for executing s2
                label L2
        """
        compiled_output_statement = etree.SubElement(parent, 'ifStatement')
        self.compile_new_token_ensure_token('if', compiled_output_statement)
        self.if_else_label_index += 1
        else_start_label_name = 'ELSE_START_{}_{}'.format(
            self.class_name.upper(), self.if_else_label_index)
        if_else_end_label_name = 'IF_ELSE_END_{}_{}'.format(
            self.class_name.upper(), self.if_else_label_index)
        self.compile_new_token_ensure_token('(', compiled_output_statement)
        self.compile_expression(compiled_output_statement)
        self.vm_writer.write_arithmetic('not')
        self.vm_writer.write_if_goto(else_start_label_name)
        self.compile_new_token_ensure_token(')', compiled_output_statement)
        self.compile_new_token_ensure_token('{', compiled_output_statement)
        compiled_output_statements_if = etree.SubElement(
            compiled_output_statement, 'statements')
        self.compile_statements(compiled_output_statements_if)
        self.vm_writer.write_goto(if_else_end_label_name)
        self.compile_new_token_ensure_token('}', compiled_output_statement)
        self.vm_writer.write_label(else_start_label_name)
        next_token = self.show_next_token()
        if next_token == 'else':
            self.compile_new_token_ensure_token('else',
                                                compiled_output_statement)
            self.compile_new_token_ensure_token('{', compiled_output_statement)
            compiled_output_statements_else = etree.SubElement(
                compiled_output_statement, 'statements')
            self.compile_statements(compiled_output_statements_else)
            self.compile_new_token_ensure_token('}', compiled_output_statement)
        self.vm_writer.write_label(if_else_end_label_name)

    def compile_statement_while(self, parent):
        """
        whileStatement: 'while' '(' expression ')' '{' statements '}'
        code: 
            while (cond) 
                s1
        vm:
            label L1
            VM code for computing ~(cond)
            if-goto L2
            VM code for executing s1
            goto L1
            label L2
        """
        compiled_output_statement = etree.SubElement(parent, 'whileStatement')
        self.compile_new_token_ensure_token('while', compiled_output_statement)
        self.while_label_index += 1
        while_start_label_name = 'WHILE_START_{}_{}'.format(
            self.class_name.upper(), self.while_label_index)
        while_end_label_name = 'WHILE_END_{}_{}'.format(
            self.class_name.upper(), self.while_label_index)
        self.vm_writer.write_label(while_start_label_name)
        self.compile_new_token_ensure_token('(', compiled_output_statement)
        self.compile_expression(compiled_output_statement)
        self.vm_writer.write_arithmetic('not')
        self.vm_writer.write_if_goto(while_end_label_name)
        self.compile_new_token_ensure_token(')', compiled_output_statement)
        self.compile_new_token_ensure_token('{', compiled_output_statement)
        compiled_output_statements_while = etree.SubElement(
            compiled_output_statement, 'statements')
        self.compile_statements(compiled_output_statements_while)
        self.vm_writer.write_goto(while_start_label_name)
        self.vm_writer.write_label(while_end_label_name)
        self.compile_new_token_ensure_token('}', compiled_output_statement)

    def compile_statement_do(self, parent):
        """
        doStatement: 'do' subroutineCall ';'
        """
        compiled_output_statement = etree.SubElement(parent, 'doStatement')
        self.compile_new_token_ensure_token('do', compiled_output_statement)
        # subroutineCall
        self.compile_subroutineCall(compiled_output_statement)
        # When translating a do sub statement where sub is a void method or function, the caller of the corresponding VM function must pop (and ignore) the returned value (which is always the constant 0).
        self.vm_writer.write_pop('temp', 0)
        self.compile_new_token_ensure_token(';', compiled_output_statement)

    def compile_subroutineCall(self, parent):
        """
        subroutineCall: subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')'
        """
        name = self.show_next_token()
        self.compile_new_token_ensure_token_type(
            'identifier', parent)  # subroutineName or className or varName
        next_token = self.show_next_token()
        if next_token == '.':
            self.compile_new_token_ensure_token('.', parent)
            symbol_type = self.symbol_table.get_symbol_type(name)
            if not symbol_type:
                # Not defined in symbol_table, so name must be className, and function name is simply className.subroutineName, needs not to be changed
                function_name = name + '.' + self.show_next_token()
                args_num_should_add_1 = False
            else:
                # name is varName, so it is an instance of a className, className is symbol_type, so we push the value of the varName first, which is the base address of the class instance, then set the function name to  className.subroutineName
                args_num_should_add_1 = True
                self.write_push_variable(name)
                function_name = symbol_type + '.' + self.show_next_token()
            self.compile_new_token_ensure_token_type('identifier',
                                                     parent)  # subroutineName
        else:
            # no '.' found, so name is subroutineName, function name should be self.class_name.subroutineName, and we need push this (pointer 0) first
            self.vm_writer.write_push('pointer', 0)
            function_name = self.class_name + '.' + name
            args_num_should_add_1 = True

        self.compile_new_token_ensure_token('(', parent)
        self.compile_expressionList(parent, function_name,
                                    args_num_should_add_1)
        self.compile_new_token_ensure_token(')', parent)

    def compile_statement_return(self, parent):
        """
        ReturnStatement 'return' expression? ';'
        """
        compiled_output_statement = etree.SubElement(parent, 'returnStatement')
        self.compile_new_token_ensure_token('return',
                                            compiled_output_statement)
        next_token = self.show_next_token()
        if next_token != ';':  # has expression
            self.compile_expression(compiled_output_statement)
        else:
            # void functions return the constant 0
            self.vm_writer.write_push('constant', 0)
        self.vm_writer.write_return()
        self.compile_new_token_ensure_token(';', compiled_output_statement)

    def compile_expression(self, parent):
        """
        expression: term (op term)*
        """
        compiled_output_expression = etree.SubElement(parent, 'expression')
        self.compile_term(compiled_output_expression)
        self.compile_zero_or_more_op_and_term(compiled_output_expression)

    def compile_term(self, parent):
        """
        term: integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term
        """
        compiled_output_term = etree.SubElement(parent, 'term')
        next_token, token_type = self.show_next_token_and_type()
        if token_type == 'integerConstant' or next_token in {
                'true', 'false', 'null', 'this'
        }:  # integerConstant or keywordConstant
            if token_type == 'integerConstant':
                self.vm_writer.write_push('constant', next_token)
            elif next_token == 'true':
                # true = -1, which is 16 bit each bit is 1
                self.vm_writer.write_push('constant', 1)
                self.vm_writer.write_arithmetic('neg')
            elif next_token == 'false' or next_token == 'null':
                self.vm_writer.write_push('constant', 0)
            else:  # next_token == 'this'
                # this will always be the content of pointer 0
                self.vm_writer.write_push('pointer', 0)
            self.compile_new_token(compiled_output_term)
        elif token_type == 'stringConstant':
            token, token_type = self.next_token_and_type()
            # remove double quote symbol in token
            string = token[1:-1]
            # Push string using OS String: String.new(length), String.appendChar(nextChar)
            self.vm_writer.write_push('constant', len(string))
            self.vm_writer.write_call('String.new', 1)
            for char in string:
                self.vm_writer.write_push('constant', ord(char))
                self.vm_writer.write_call('String.appendChar', 2)
            self.add_sub_element(compiled_output_term, token_type, string)
        elif token_type == 'identifier':
            next_next_token, token_type = self.tokens_with_tokenType[1]
            if next_next_token == '[':  # Array
                """
                code: 
                    a[i]
                vm:
                    push a
                    push i
                    add
                    pop pointer 1
                    push that 0
                """
                symbol_name = next_token
                self.write_push_variable(symbol_name)
                self.compile_new_token_ensure_token_type(
                    'identifier', compiled_output_term)
                self.compile_new_token_ensure_token('[', compiled_output_term)
                self.compile_expression(compiled_output_term)
                self.vm_writer.write_arithmetic('add')
                self.vm_writer.write_pop('pointer', 1)
                # Push the value of the array item to stack using segment that
                self.vm_writer.write_push('that', 0)
                self.compile_new_token_ensure_token(']', compiled_output_term)
            elif next_next_token == '(' or next_next_token == '.':
                self.compile_subroutineCall(compiled_output_term)
            else:  # A single varName
                symbol_name = next_token
                self.write_push_variable(symbol_name)
                self.compile_new_token_ensure_token_type(
                    'identifier', compiled_output_term)
        elif next_token == '(':
            self.compile_new_token(compiled_output_term)
            self.compile_expression(compiled_output_term)
            self.compile_new_token_ensure_token(')', compiled_output_term)
        elif next_token in {'-', '~'}:  # unaryOp
            self.compile_new_token(compiled_output_term)
            self.compile_term(compiled_output_term)
            if next_token == '-':
                self.vm_writer.write_arithmetic('neg')
            else:
                self.vm_writer.write_arithmetic('not')
        else:
            raise 'Not a valid expression'

    def compile_zero_or_more_op_and_term(self, parent):
        """
        op: '+' | '-' | '*' | '/' | '&' | '|' | '<' | '>' | '='
        """
        next_token = self.show_next_token()
        if next_token in {'+', '-', '*', '/', '&', '|', '<', '>',
                          '='}:  # in op
            self.compile_new_token(parent)  # add op
            self.compile_term(parent)
            # Write vm code for operator
            if next_token == '*':
                self.vm_writer.write_call('Math.multiply', 2)
            elif next_token == '/':
                self.vm_writer.write_call('Math.divide', 2)
            else:
                operator = self.op_dict[next_token]
                self.vm_writer.write_arithmetic(operator)
            # Recursive call
            self.compile_zero_or_more_op_and_term(parent)

    def compile_expressionList(self, parent, function_name,
                               args_num_should_add_1):
        """
        expressionList: (expression (',' expression)* )?
        """
        compiled_output_expressionList = etree.SubElement(
            parent, 'expressionList')
        self.args_num = 0
        if args_num_should_add_1:
            # if function_name is varName.subroutineName or self.class_name.subroutineName, the number of arguments should add 1 because we first push the base address of the operated object
            self.args_num += 1
        next_token = self.show_next_token()
        if next_token == ')':
            # No expression
            compiled_output_expressionList.text = '\n\t'
            self.vm_writer.write_call(function_name, self.args_num)
        else:
            self.compile_expression(compiled_output_expressionList)
            self.args_num += 1
            self.compile_comma_and_expression(compiled_output_expressionList)
            self.vm_writer.write_call(function_name, self.args_num)

    def compile_comma_and_expression(self, parent):
        next_token = self.show_next_token()
        if next_token == ',':
            self.compile_new_token_ensure_token(',', parent)
            self.args_num += 1
            self.compile_expression(parent)
            # Recursive call
            self.compile_comma_and_expression(parent)

    def write_push_variable(self, symbol_name):
        """Push the value of variable to working stack"""
        index = self.symbol_table.get_symbol_index(symbol_name)
        symbol_kind = self.symbol_table.get_symbol_kind(symbol_name)
        segment = self.segment_dict[symbol_kind]
        self.vm_writer.write_push(segment, index)

    def write_pop_variable(self, symbol_name):
        """Pop the top value of the working stack to variable"""
        index = self.symbol_table.get_symbol_index(symbol_name)
        symbol_kind = self.symbol_table.get_symbol_kind(symbol_name)
        segment = self.segment_dict[symbol_kind]
        self.vm_writer.write_pop(segment, index)
Esempio n. 19
0
class CompilationEngine:


    def __init__(self, source):
        self.if_counter = 0
        self.while_counter = 0
        self.tokenizer = Tokenizer(source)
        self.tokenizer.has_more_tokens()
        self.tokenizer.advance()
        self.symbols = SymbolTable()
        self.writer = VMWriter(source)
        self.arithmetic_op = {}
        self.init_op()
        self.root = Element(CLASS)
        self.class_name = ""
        self.compile_class(self.root)
        self.writer.close()

    def init_op(self):
        self.arithmetic_op = {'+': "add",
                         '-': "sub",
                         '*': "call Math.multiply 2",
                         '/': "call Math.divide 2",
                         '&': "and",
                         '|': "or",
                              '<': "lt",
                              '>': "gt",
                              '=': "eq"
                        }

    def next(self):
        """
        Proceed to the next token.
        :return:
        """
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()

    def compile_expression(self,caller):
        """
        Compiles an expression.
        :param caller:
        :return:
        """
        op_stack = []
        self.compile_term(SubElement(caller,TERM))
        while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS:
            op_stack.append(self.tokenizer.symbol())
            self.next()
            self.compile_term(SubElement(caller,TERM))

        while op_stack:
            self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()])

    def compile_expressionList(self,caller):
        num_of_args = 0
        #  if expression list is empty
        if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = " "
            return num_of_args

        num_of_args += 1
        self.compile_expression(SubElement(caller,EXPRESSION))
        while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            num_of_args += 1
            self.next()
            self.compile_expression(SubElement(caller,EXPRESSION))
        return num_of_args

    def compile_subroutineCall(self,caller,first_token):
        func_name = first_token
        
        is_method = 0
        if self.tokenizer.symbol() == '.':
            self.next()
            if self.symbols.kind_of(func_name): 
                segment = self.symbols.kind_of(func_name)
                segment = Kind.get_segment(segment)
                index = self.symbols.index_of(func_name)
                self.writer.write_push(segment,index)
                func_name = self.symbols.type_of(func_name)
                is_method = 1

            func_name = func_name+"."+self.tokenizer.identifier()
            self.next()
        else:
            func_name = self.class_name+"."+func_name
            self.writer.write_push(POINTER,0)
            is_method = 1

        self.next()
        num_of_args = self.compile_expressionList(SubElement(caller, EXPRESSION_LIST))+is_method

        self.writer.write_call(func_name,num_of_args)
       
        self.next()


    def compile_term(self,caller):
        type = self.tokenizer.token_type()
        if type is JTok.INT_CONST:
            self.writer.write_push(CONSTANT,self.tokenizer.intVal())
            self.next()

        elif type is JTok.STRING_CONST:

            string_val = self.tokenizer.string_val()
            self.writer.write_push(CONSTANT,len(string_val))
            self.writer.write_call("String.new", 1)
            for c in string_val:
                self.writer.write_push(CONSTANT,ord(c))
                self.writer.write_call("String.appendChar", 2)
            self.next()

        elif type is JTok.KEYWORD:
           if self.tokenizer.key_word() in {"null", "false"}:
                self.writer.write_push(CONSTANT, 0)
            elif self.tokenizer.key_word() == "true": 
                self.writer.write_push(CONSTANT, 1)
                self.writer.write_arithmetic("neg")
            elif self.tokenizer.key_word() == "this":
                self.writer.write_push(POINTER, 0)
            else:
                print("unexpected")

            self.next()

        elif type is JTok.IDENTIFIER:
            name = self.tokenizer.identifier()

            self.next()
            type = self.tokenizer.token_type()

            if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}:
                    self.compile_subroutineCall(caller,name)

            elif type is JTok.SYMBOL and self.tokenizer.symbol() == '[': 
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(),index)
                else:
                    print("unexpected")
                self.writer.write_arithmetic("add")
                self.writer.write_pop(POINTER,1)
                self.writer.write_push("that",0)
                self.next()

            else:
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(),index)
                else:
                    print("unexpected")

        elif type is JTok.SYMBOL:
            if self.tokenizer.symbol() == '(':
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                self.next()

            elif self.tokenizer.symbol() in {'-','~'}:
                unary_op = self.tokenizer.symbol()
                self.next()
                self.compile_term(SubElement(caller,TERM))
                if unary_op == "-":
                    self.writer.write_arithmetic("neg")
                elif unary_op == "~":
                    self.writer.write_arithmetic("not")
                else:
                    "unexpected"



    def compile_do(self, caller):
        self.next()

        name = self.tokenizer.identifier()
        self.next()

        self.compile_subroutineCall(caller,name)
        self.writer.write_pop(TEMP,0)
        self.next()

    def compile_let(self, caller):
        self.next()

        varName = self.tokenizer.identifier()
        self.next()

        kind = self.symbols.kind_of(varName)
        kind = kind.get_segment()
        index = self.symbols.index_of(varName)

        if self.tokenizer.symbol() == '[': 
            self.next() 

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_push(kind,index)
            self.writer.write_arithmetic("add")
            self.next() 
            self.next() 
            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(TEMP,0)
            self.writer.write_pop(POINTER,1)
            self.writer.write_push(TEMP,0)
            self.writer.write_pop("that",0)

        else:
            self.next() 

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(kind,index)

        self.next() 


    def compile_return(self, caller):
        self.next()

        if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ";":
            self.writer.write_push(CONSTANT, 0)
            self.writer.write_return()
            self.next()
            return

        self.compile_expression(SubElement(caller,EXPRESSION))
        self.writer.write_return()
        self.next()

    def compile_while(self, caller):
        while_index = self.while_counter
        self.while_counter += 1
        self.writer.write_label("WHILE_EXP"+str(while_index))
        self.next() 

        self.next() 

        self.compile_expression(SubElement(caller, EXPRESSION))
        self.writer.write_arithmetic("not")
        self.writer.write_if("WHILE_END"+str(while_index))

        self.next()

        self.next() 

        self.compile_statements(SubElement(caller, STATEMENTS))

        self.writer.write_goto("WHILE_EXP"+str(while_index))
        self.writer.write_label("WHILE_END"+str(while_index))
        self.next()


    def compile_statements(self, caller):
        STATEMENTS = {'do','while','let','return','if'}
        caller.text = " "
        while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS:
            if self.tokenizer.key_word() == 'do':
                self.compile_do(SubElement(caller, 'doStatement'))
            elif self.tokenizer.key_word() == 'while':
                self.compile_while(SubElement(caller, 'whileStatement'))
            elif self.tokenizer.key_word() == 'let':
                self.compile_let(SubElement(caller, 'letStatement'))
            elif self.tokenizer.key_word() == 'return':
                self.compile_return(SubElement(caller, 'returnStatement'))
            elif self.tokenizer.key_word() == 'if':
                self.compile_if(SubElement(caller, 'ifStatement'))

    def compile_if(self, caller):
       self.next()  # (
        self.compile_expression(caller)
        self.next()  # {

        if_index = self.if_counter
        self.if_counter += 1
        self.writer.write_if("IF_TRUE" + str(if_index))

        self.writer.write_goto("IF_FALSE" + str(if_index))
        self.writer.write_label("IF_TRUE" + str(if_index))

        self.compile_statements(caller)

        self.next()

        if self.tokenizer.key_word() == 'else':
            self.writer.write_goto("IF_END" + str(if_index))
            self.writer.write_label("IF_FALSE" + str(if_index))

            self.next()  
            self.next()  
            self.compile_statements(caller)
            self.next()  
            self.writer.write_label("IF_END" + str(if_index))
        else:
            self.writer.write_label("IF_FALSE" + str(if_index))

        return
class CompilationEngine():
    """
    compiles a jack source file from a jack tokenizer into xml form in output_file
    NOTE: ASSUMES ERROR FREE CODE -> a todo could be to add error handling
    """
    SYMBOL_KINDS = {'parameter_list': 'argument', 'var_dec': 'local'}
    CLASS_VAR_DEC_TOKENS = ["static", "field"]
    SUBROUTINE_TOKENS = ["function", "method", "constructor"]
    STATEMENT_TOKENS = ['do', 'let', 'while', 'return', 'if']
    STARTING_TOKENS = {
        'var_dec': ['var'],
        'parameter_list': ['('],
        'subroutine_body': ['{'],
        'expression_list': ['('],
        'expression': ['=', '[', '('],
        'array': ['['],
        'conditional': ['if', 'else']
    }
    TERMINATING_TOKENS = {
        'class': ['}'],
        'class_var_dec': [';'],
        'subroutine': ['}'],
        'parameter_list': [')'],
        'expression_list': [')'],
        'statements': ['}'],
        'do': [';'],
        'let': [';'],
        'while': ['}'],
        'if': ['}'],
        'var_dec': [';'],
        'return': [';'],
        'expression': [';', ')', ']', ','],
        'array': [']']
    }
    OPERATORS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    UNARY_OPERATORS = ['-', '~']
    TOKENS_THAT_NEED_LABELS = ['if', 'while']

    def __init__(self, tokenizer, output_file):
        self.tokenizer = tokenizer
        self.output_file = output_file
        self.class_symbol_table = SymbolTable()
        self.subroutine_symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)
        self.label_counter = LabelCounter(labels=self.TOKENS_THAT_NEED_LABELS)
        self.class_name = None

    def compile_class(self):
        """
        everything needed to compile a class, the basic unit of compilation
        """
        # skip everything up to class start
        while not self.tokenizer.class_token_reached():
            self.tokenizer.advance()
        # since compilation unit is a class makes sense to store this as instance variable
        self.class_name = self.tokenizer.next_token

        while self.tokenizer.has_more_tokens:
            self.tokenizer.advance()

            if self.tokenizer.current_token in self.CLASS_VAR_DEC_TOKENS:
                self.compile_class_var_dec()
            elif self.tokenizer.current_token in self.SUBROUTINE_TOKENS:
                self.compile_subroutine()

    def compile_class_var_dec(self):
        """
        example: field int x;
        """
        symbol_kind = self.tokenizer.keyword()

        # get symbol type
        self.tokenizer.advance()
        symbol_type = self.tokenizer.keyword()

        # get all identifiers
        while self._not_terminal_token_for('class_var_dec'):
            self.tokenizer.advance()

            if self.tokenizer.identifier():
                # add symbol to class
                symbol_name = self.tokenizer.identifier()
                self.class_symbol_table.define(name=symbol_name,
                                               kind=symbol_kind,
                                               symbol_type=symbol_type)

    def compile_subroutine(self):
        """
        example: methoid void dispose() { ...
        """
        # new subroutine means new subroutine scope
        self.subroutine_symbol_table.reset()

        # get subroutine name
        self.tokenizer.advance()
        self.tokenizer.advance()
        subroutine_name = self.tokenizer.current_token

        # compile parameter list
        self.tokenizer.advance()
        self.compile_parameter_list()

        # compile body
        self.tokenizer.advance()
        self.compile_subroutine_body(subroutine_name=subroutine_name)

        # rest counts from subroutine
        self.label_counter.reset_counts()

    def compile_subroutine_body(self, subroutine_name):
        # skip start
        self.tokenizer.advance()
        # get all locals
        num_locals = 0
        while self._starting_token_for('var_dec'):
            num_locals += self.compile_var_dec()
            self.tokenizer.advance()

        # write function command
        self.vm_writer.write_function(name='{}.{}'.format(
            self.class_name, subroutine_name),
                                      num_locals=num_locals)

        # compile all statements
        while self._not_terminal_token_for('subroutine'):
            self.compile_statements()

    def compile_parameter_list(self):
        """
        example: dispose(int a, int b)
        returns number of params found
        """
        ### symbol table
        while self._not_terminal_token_for('parameter_list'):
            self.tokenizer.advance()

            # symbol table
            if self.tokenizer.token_type_of(
                    self.tokenizer.next_token) == "IDENTIFIER":
                symbol_kind = self.SYMBOL_KINDS['parameter_list']
                symbol_type = self.tokenizer.current_token
                symbol_name = self.tokenizer.next_token
                self.subroutine_symbol_table.define(name=symbol_name,
                                                    kind=symbol_kind,
                                                    symbol_type=symbol_type)

    # 'var' type varName (',' varName)* ';'
    def compile_var_dec(self):
        """
        example: var int a;
        """
        # skip var
        self.tokenizer.advance()
        # get symbol type
        symbol_type = self.tokenizer.current_token
        # count number of vars, i.e., var int i, sum = 2
        num_vars = 0

        # get all vars
        while self._not_terminal_token_for('var_dec'):
            self.tokenizer.advance()

            if self.tokenizer.identifier():
                num_vars += 1
                symbol_kind = self.SYMBOL_KINDS['var_dec']
                symbol_name = self.tokenizer.identifier()
                self.subroutine_symbol_table.define(name=symbol_name,
                                                    kind=symbol_kind,
                                                    symbol_type=symbol_type)
        # return vars processed
        return num_vars

    def compile_statements(self):
        """
        call correct statement
        """
        # TODO: way to make this global for class?
        statement_compile_methods = {
            'if': self.compile_if,
            'do': self.compile_do,
            'let': self.compile_let,
            'while': self.compile_while,
            'return': self.compile_return
        }

        while self._not_terminal_token_for('subroutine'):
            if self.tokenizer.current_token in self.STATEMENT_TOKENS:
                statement_type = self.tokenizer.current_token
                statement_compile_methods[statement_type]()

            self.tokenizer.advance()

    def compile_do(self):
        """
        example: do square.dispose();
        """
        # get to caller
        self.tokenizer.advance()
        # set caller_name
        caller_name = self.tokenizer.current_token
        # look up in symbol table
        symbol = self._find_symbol_in_symbol_tables(symbol_name=caller_name)
        # skip .
        self.tokenizer.advance()
        # subroutine name
        self.tokenizer.advance()
        # set subroutine name
        subroutine_name = self.tokenizer.current_token

        if symbol:  # user defined Method
            # push value onto local segment
            segment = 'local'
            index = symbol['index']
            symbol_type = symbol['type']
            self.vm_writer.write_push(segment=segment, index=index)
        else:  # i.e, OS call
            symbol_type = caller_name

        subroutine_call_name = symbol_type + '.' + subroutine_name
        # start expression list
        self.tokenizer.advance()
        # get arguments in expession list
        num_args = self.compile_expression_list()
        # method call
        if symbol:
            # calling object passed as implicit argument
            num_args += 1
        # write call
        self.vm_writer.write_call(name=subroutine_call_name, num_args=num_args)
        # pop off return of previous call we don't care about
        self.vm_writer.write_pop(segment='temp', index='0')

    # 'let' varName ('[' expression ']')? '=' expression ';'
    def compile_let(self):
        """
        example: let direction = 0;
        """
        # get symbol to store expression evaluation
        self.tokenizer.advance()
        symbol_name = self.tokenizer.current_token
        symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name)

        # array assignment?
        array_assignment = self._starting_token_for(keyword_token='array',
                                                    position='next')
        if array_assignment:
            # get to index expression
            self.tokenizer.advance()
            self.tokenizer.advance()
            # compile it
            self.compile_expression()
            self.vm_writer.write_push(segment=symbol['kind'],
                                      index=symbol['index'])
            # add two addresses
            self.vm_writer.write_arithmetic(command='+')

        # go past =
        while not self.tokenizer.current_token == '=':
            self.tokenizer.advance()
        # compile all expressions
        while self._not_terminal_token_for('let'):
            self.tokenizer.advance()
            self.compile_expression()

        if not array_assignment:
            # store expression evaluation in symbol location
            self.vm_writer.write_pop(segment=symbol['kind'],
                                     index=symbol['index'])
        else:  # array unloading
            # pop return value onto temp
            self.vm_writer.write_pop(segment='temp', index='0')
            # pop address of array slot onto THAT
            self.vm_writer.write_pop(segment='pointer',
                                     index='1')  # pointer 1 => array
            # push value on temp back onto stack
            self.vm_writer.write_push(segment='temp', index='0')
            # set that
            self.vm_writer.write_pop(segment='that', index='0')

    # 'while' '(' expression ')' '{' statements '}'
    def compile_while(self):
        """
        example: while (x > 0) { ... }
        """
        # write while label
        self.vm_writer.write_label(
            label='WHILE_EXP{}'.format(self.label_counter.get('while')))

        # advance to expression start (
        self.tokenizer.advance()
        self.tokenizer.advance()

        # compile expression in ()
        self.compile_expression()

        # NOT expression so for easily handling of termination and if-goto
        self.vm_writer.write_unary(command='~')
        self.vm_writer.write_ifgoto(
            label='WHILE_END{}'.format(self.label_counter.get('while')))

        while self._not_terminal_token_for('while'):
            self.tokenizer.advance()

            if self._statement_token():
                self.compile_statements()

        # write goto
        self.vm_writer.write_goto(
            label='WHILE_EXP{}'.format(self.label_counter.get('while')))
        # write end label
        self.vm_writer.write_label(
            label='WHILE_END{}'.format(self.label_counter.get('while')))

        # add while to labels count
        self.label_counter.increment('while')

    def compile_if(self):
        """
        example: if (True) { ... } else { ... }
        """
        # advance to expression start
        self.tokenizer.advance()
        self.tokenizer.advance()
        # compile expression in ()
        self.compile_expression()
        # write ifgoto to if statement
        self.vm_writer.write_ifgoto(
            label='IF_TRUE{}'.format(self.label_counter.get('if')))
        # write goto if false (else)
        self.vm_writer.write_goto(
            label='IF_FALSE{}'.format(self.label_counter.get('if')))
        # write if label
        self.vm_writer.write_label(
            label='IF_TRUE{}'.format(self.label_counter.get('if')))
        # body of if
        self.compile_conditional_body()
        # else?
        if self._starting_token_for(keyword_token='conditional',
                                    position='next'):
            # past closing {
            self.tokenizer.advance()
            # goto if end if this path wasn't hit
            self.vm_writer.write_goto(
                label='IF_END{}'.format(self.label_counter.get('if')))
            # if false
            self.vm_writer.write_label(
                label='IF_FALSE{}'.format(self.label_counter.get('if')))
            # compile else
            self.compile_conditional_body()
            # define IF_END
            self.vm_writer.write_label(
                label='IF_END{}'.format(self.label_counter.get('if')))
        else:  # no else present
            # go to end of if
            self.vm_writer.write_label(
                label='IF_FALSE{}'.format(self.label_counter.get('if')))

    def compile_conditional_body(self):
        while self._not_terminal_token_for('if'):
            self.tokenizer.advance()

            if self._statement_token():
                if self.tokenizer.current_token == 'if':
                    # add ifto labels count
                    self.label_counter.increment('if')
                    # compile nested if
                    self.compile_statements()
                    # subtract for exiting nesting
                    self.label_counter.decrement('if')
                else:
                    self.compile_statements()

    # term (op term)*
    def compile_expression(self):
        """
        many examples..i,e., x = 4
        """
        # ops get compiled at end in reverse order in which they were added
        ops = []

        while self._not_terminal_token_for('expression'):
            if self._subroutine_call():
                self.compile_subroutine_call()
            elif self._array_expression():
                self.compile_array_expression()
            elif self.tokenizer.current_token.isdigit():
                self.vm_writer.write_push(segment='constant',
                                          index=self.tokenizer.current_token)
            elif self.tokenizer.identifier():
                self.compile_symbol_push()
            elif self.tokenizer.current_token in self.OPERATORS and not self._part_of_expression_list(
            ):
                ops.insert(
                    0,
                    Operator(token=self.tokenizer.current_token,
                             category='bi'))
            elif self.tokenizer.current_token in self.UNARY_OPERATORS:
                ops.insert(
                    0,
                    Operator(token=self.tokenizer.current_token,
                             category='unary'))
            elif self.tokenizer.string_const():
                self.compile_string_const()
            elif self.tokenizer.boolean():  # boolean case
                self.compile_boolean()
            elif self._starting_token_for('expression'):  # nested expression
                # skip starting (
                self.tokenizer.advance()
                self.compile_expression()
            elif self.tokenizer.null():
                self.vm_writer.write_push(segment='constant', index=0)

            self.tokenizer.advance()

        # compile_ops
        for op in ops:
            self.compile_op(op)

    def compile_op(self, op):
        """
        example: +, /, etc.
        """
        if op.unary():
            self.vm_writer.write_unary(command=op.token)
        elif op.multiplication():
            self.vm_writer.write_call(name='Math.multiply', num_args=2)
        elif op.division():
            self.vm_writer.write_call(name='Math.divide', num_args=2)
        else:
            self.vm_writer.write_arithmetic(command=op.token)

    def compile_boolean(self):
        """
        'true' and 'false'
        """
        self.vm_writer.write_push(segment='constant', index=0)

        if self.tokenizer.boolean() == 'true':
            # negate true
            self.vm_writer.write_unary(command='~')

    def compile_string_const(self):
        """
        example: "Hello World"
        """
        # handle string const
        string_length = len(self.tokenizer.string_const())
        self.vm_writer.write_push(segment='constant', index=string_length)
        self.vm_writer.write_call(name='String.new', num_args=1)
        # build string from chars
        for char in self.tokenizer.string_const():
            if not char == self.tokenizer.STRING_CONST_DELIMITER:
                ascii_value_of_char = ord(char)
                self.vm_writer.write_push(segment='constant',
                                          index=ascii_value_of_char)
                self.vm_writer.write_call(name='String.appendChar', num_args=2)

    def compile_symbol_push(self):
        """
        example: x
        """
        symbol = self._find_symbol_in_symbol_tables(
            symbol_name=self.tokenizer.identifier())
        segment = symbol['kind']
        index = symbol['index']
        self.vm_writer.write_push(segment=segment, index=index)

    def compile_array_expression(self):
        """
        example: let x = a[j], a[4]
        """
        symbol_name = self.tokenizer.current_token
        symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name)
        # get to index expression
        self.tokenizer.advance()
        self.tokenizer.advance()
        # compile
        self.compile_expression()
        # push onto local array symbol
        self.vm_writer.write_push(segment='local', index=symbol['index'])
        # add two addresses: identifer and expression result
        self.vm_writer.write_arithmetic(command='+')
        # pop address onto pointer 1 / THAT
        self.vm_writer.write_pop(segment='pointer', index=1)
        # push value onto stack
        self.vm_writer.write_push(segment='that', index=0)

    def compile_subroutine_call(self):
        """
        example: Memory.peek(8000)
        """
        subroutine_name = ''

        while not self._starting_token_for('expression_list'):
            subroutine_name += self.tokenizer.current_token
            self.tokenizer.advance()
        # get num of args
        num_args = self.compile_expression_list()
        # write_call after pushing arguments onto stack
        self.vm_writer.write_call(name=subroutine_name, num_args=num_args)

    # (expression (',' expression)* )?
    def compile_expression_list(self):
        """
        separeted out of compile_expression because of edge cases from normal expression
        example: (x, y, x + 5)
        """
        num_args = 0

        if self._empty_expression_list():
            return num_args

        # start expressions
        self.tokenizer.advance()

        while self._not_terminal_token_for('expression_list'):
            num_args += 1
            self.compile_expression()
            if self._another_expression_coming(
            ):  # would be , after compile expression
                self.tokenizer.advance()
        return num_args

    def compile_return(self):
        """
        example: return x; or return;
        """
        if self._not_terminal_token_for(keyword_token='return',
                                        position='next'):
            self.compile_expression()
        else:  # push constant for void
            self.vm_writer.write_push(segment='constant', index='0')
            self.tokenizer.advance()

        self.vm_writer.write_return()

    def _not_terminal_token_for(self, keyword_token, position='current'):
        if position == 'current':
            return not self.tokenizer.current_token in self.TERMINATING_TOKENS[
                keyword_token]
        elif position == 'next':
            return not self.tokenizer.next_token in self.TERMINATING_TOKENS[
                keyword_token]

    def _starting_token_for(self, keyword_token, position='current'):
        if position == 'current':
            return self.tokenizer.current_token in self.STARTING_TOKENS[
                keyword_token]
        elif position == 'next':
            return self.tokenizer.next_token in self.STARTING_TOKENS[
                keyword_token]

    def _statement_token(self):
        return self.tokenizer.current_token in self.STATEMENT_TOKENS

    def _operator_token(self, position='current'):
        if position == 'current':
            return self.tokenizer.current_token in self.OPERATORS
        elif position == 'next':
            return self.tokenizer.next_token in self.OPERATORS

    def _another_expression_coming(self):
        return self.tokenizer.current_token == ","

    def _find_symbol_in_symbol_tables(self, symbol_name):
        if self.subroutine_symbol_table.find_symbol_by_name(symbol_name):
            return self.subroutine_symbol_table.find_symbol_by_name(
                symbol_name)
        elif self.class_symbol_table.find_symbol_by_name(symbol_name):
            return self.class_symbol_table.find_symbol_by_name(symbol_name)

    def _empty_expression_list(self):
        return self._start_of_expression_list(
        ) and self._next_ends_expression_list()

    def _start_of_expression_list(self):
        return self.tokenizer.current_token in self.STARTING_TOKENS[
            'expression_list']

    def _next_ends_expression_list(self):
        return self.tokenizer.next_token in self.TERMINATING_TOKENS[
            'expression_list']

    def _subroutine_call(self):
        return self.tokenizer.identifier() and self.tokenizer.next_token == '.'

    def _array_expression(self):
        return self.tokenizer.identifier() and self._starting_token_for(
            keyword_token='array', position='next')

    def _part_of_expression_list(self):
        return self.tokenizer.tokens_found[-3] in [
            ',', '('
        ]  # distinguish neg from sub
Esempio n. 21
0
class CompilationEngine:
    def __init__(self, source):
        self.if_counter = 0
        self.while_counter = 0
        self.tokenizer = Tokenizer(source)
        self.tokenizer.has_more_tokens()
        self.tokenizer.advance()
        self.symbols = SymbolTable()
        self.writer = VMWriter(source)
        self.arithmetic_op = {}
        self.init_op()
        self.root = Element(CLASS)
        self.class_name = ""
        self.compile_class(self.root)
        self.writer.close()

    def init_op(self):
        self.arithmetic_op = {
            '+': "add",
            '-': "sub",
            '*': "call Math.multiply 2",
            '/': "call Math.divide 2",
            '&': "and",
            '|': "or",
            '<': "lt",
            '>': "gt",
            '=': "eq"
        }

    def next(self):
        """
        Proceed to the next token.
        :return:
        """
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()

    def compile_expression(self, caller):
        """
        Compiles an expression.
        :param caller:
        :return:
        """
        op_stack = []
        self.compile_term(SubElement(caller, TERM))
        while self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS:
            op_stack.append(self.tokenizer.symbol())
            self.next()
            self.compile_term(SubElement(caller, TERM))

        while op_stack:
            self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()])

    def compile_expressionList(self, caller):
        """
            compiles a list of expressions
        :param caller:
        :return: num_of_args - number of expressions in expressions list.
        used by function call
        """
        num_of_args = 0
        #  if expression list is empty
        if self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = " "
            return num_of_args

        num_of_args += 1
        self.compile_expression(SubElement(caller, EXPRESSION))
        while self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            num_of_args += 1
            self.next()
            self.compile_expression(SubElement(caller, EXPRESSION))
        return num_of_args

    def compile_subroutineCall(self, caller, first_token):
        """
        First token, the first identifier must be sent manually, so the method
        expects the current token to be the second in the specification.
        :param caller:
        :param first_token:
        :return:
        """
        #SubElement(caller, IDENTIFIER).text = first_token
        func_name = first_token
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        is_method = 0
        if self.tokenizer.symbol() == '.':
            self.next()
            if self.symbols.kind_of(func_name):  # If first token is var name
                segment = self.symbols.kind_of(func_name)
                segment = Kind.get_segment(segment)
                index = self.symbols.index_of(func_name)
                self.writer.write_push(segment, index)
                func_name = self.symbols.type_of(func_name)
                is_method = 1

            func_name = func_name + "." + self.tokenizer.identifier()
            #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
            self.next()

            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
        else:
            func_name = self.class_name + "." + func_name
            self.writer.write_push(POINTER, 0)
            is_method = 1

        self.next()
        num_of_args = self.compile_expressionList(
            SubElement(caller, EXPRESSION_LIST)) + is_method

        self.writer.write_call(func_name, num_of_args)
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        self.next()

    def compile_term(self, caller):
        """

        :param caller:
        :return:
        """
        type = self.tokenizer.token_type()
        if type is JTok.INT_CONST:
            #SubElement(caller, INTEGER_CONSTANT).text = str(self.tokenizer.intVal())
            self.writer.write_push(CONSTANT, self.tokenizer.intVal())
            self.next()

        elif type is JTok.STRING_CONST:

            string_val = self.tokenizer.string_val()
            self.writer.write_push(CONSTANT, len(string_val))
            self.writer.write_call("String.new", 1)
            for c in string_val:
                self.writer.write_push(CONSTANT, ord(c))
                self.writer.write_call("String.appendChar", 2)
            self.next()

        elif type is JTok.KEYWORD:
            #SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
            if self.tokenizer.key_word() in {"null", "false"}:
                self.writer.write_push(CONSTANT, 0)
            elif self.tokenizer.key_word(
            ) == "true":  # Assuming valid input, it must be true
                self.writer.write_push(CONSTANT, 1)
                self.writer.write_arithmetic("neg")
            elif self.tokenizer.key_word() == "this":
                self.writer.write_push(POINTER, 0)
            else:
                print("unexpected")

            self.next()

        elif type is JTok.IDENTIFIER:
            name = self.tokenizer.identifier()

            self.next()
            type = self.tokenizer.token_type()

            if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}:
                self.compile_subroutineCall(caller, name)

            elif type is JTok.SYMBOL and self.tokenizer.symbol(
            ) == '[':  #TODO: Arrays, later
                # SubElement(caller, IDENTIFIER).text = name
                # SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(), index)
                else:
                    print("unexpected")
                self.writer.write_arithmetic("add")
                self.writer.write_pop(POINTER, 1)
                self.writer.write_push("that", 0)

                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

            else:
                #SubElement(caller, IDENTIFIER).text = name
                kind = self.symbols.kind_of(name)
                index = self.symbols.index_of(name)
                if kind is not None:
                    self.writer.write_push(kind.get_segment(), index)
                else:
                    print("unexpected")

        elif type is JTok.SYMBOL:
            if self.tokenizer.symbol() == '(':
                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

                self.compile_expression(SubElement(caller, EXPRESSION))
                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                self.next()

            elif self.tokenizer.symbol() in {'-', '~'}:
                #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
                unary_op = self.tokenizer.symbol()
                self.next()
                self.compile_term(SubElement(caller, TERM))
                if unary_op == "-":
                    self.writer.write_arithmetic("neg")
                elif unary_op == "~":
                    self.writer.write_arithmetic("not")
                else:
                    "unexpected"

    def compile_do(self, caller):
        """
        format : 'do' subroutineCall ';'
        :param caller:
        :return:
        """

        #SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
        self.next()

        name = self.tokenizer.identifier()
        self.next()

        self.compile_subroutineCall(caller, name)
        self.writer.write_pop(TEMP, 0)
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ';'
        self.next()

    def compile_let(self, caller):
        """
        format : 'let' varName ( '[' expression ']' )? '=' expression ';'
        :param caller:
        :return:
        """
        self.next()  # skip 'let'

        varName = self.tokenizer.identifier()
        self.next()

        kind = self.symbols.kind_of(varName)
        kind = kind.get_segment()
        index = self.symbols.index_of(varName)

        if self.tokenizer.symbol() == '[':  # if array
            self.next()  # skip [

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_push(kind, index)
            self.writer.write_arithmetic("add")
            self.next()  # skip ]
            self.next()  # skip =
            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(TEMP, 0)
            self.writer.write_pop(POINTER, 1)
            self.writer.write_push(TEMP, 0)
            self.writer.write_pop("that", 0)

        else:
            self.next()  # skip =

            self.compile_expression(SubElement(caller, EXPRESSION))
            self.writer.write_pop(kind, index)

        self.next()  # skip ;

    def compile_return(self, caller):
        """
        format : 'return' expression? ';'
        :param caller:
        :return:
        """
        #SubElement(caller,KEYWORD).text = self.tokenizer.identifier()
        self.next()

        if self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ";":
            #SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.writer.write_push(CONSTANT, 0)
            self.writer.write_return()
            self.next()
            return

        self.compile_expression(SubElement(caller, EXPRESSION))
        self.writer.write_return()
        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()
        self.next()

    def compile_while(self, caller):
        """
        format : 'while' '(' expression ')' '{' statements '}'
        :param caller:
        :return:
        """
        while_index = self.while_counter
        self.while_counter += 1
        self.writer.write_label("WHILE_EXP" + str(while_index))
        self.next()  # skip while

        self.next()  # skip (

        self.compile_expression(SubElement(caller, EXPRESSION))
        self.writer.write_arithmetic("not")
        self.writer.write_if("WHILE_END" + str(while_index))

        self.next()  # skip )

        self.next()  # skip {

        self.compile_statements(SubElement(caller, STATEMENTS))

        self.writer.write_goto("WHILE_EXP" + str(while_index))
        self.writer.write_label("WHILE_END" + str(while_index))
        self.next()  # skip }

    def compile_statements(self, caller):
        """

        :param caller:
        :return:
        """
        STATEMENTS = {'do', 'while', 'let', 'return', 'if'}
        caller.text = " "
        while self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS:
            if self.tokenizer.key_word() == 'do':
                self.compile_do(SubElement(caller, 'doStatement'))
            elif self.tokenizer.key_word() == 'while':
                self.compile_while(SubElement(caller, 'whileStatement'))
            elif self.tokenizer.key_word() == 'let':
                self.compile_let(SubElement(caller, 'letStatement'))
            elif self.tokenizer.key_word() == 'return':
                self.compile_return(SubElement(caller, 'returnStatement'))
            elif self.tokenizer.key_word() == 'if':
                self.compile_if(SubElement(caller, 'ifStatement'))

    def compile_if(self, caller):
        """
        format : 'if' '(' expression ')' '{' statements '}'
        ( 'else' '{' statements '}' )?
        :param caller:
        :return:
        """

        self.next()  # (
        self.compile_expression(caller)
        self.next()  # {

        if_index = self.if_counter
        self.if_counter += 1
        self.writer.write_if("IF_TRUE" + str(if_index))

        self.writer.write_goto("IF_FALSE" + str(if_index))
        self.writer.write_label("IF_TRUE" + str(if_index))

        self.compile_statements(caller)

        self.next()

        if self.tokenizer.key_word() == 'else':
            self.writer.write_goto("IF_END" + str(if_index))
            self.writer.write_label("IF_FALSE" + str(if_index))

            self.next()  # else
            self.next()  # {
            self.compile_statements(caller)
            self.next()  # }

            self.writer.write_label("IF_END" + str(if_index))
        else:
            self.writer.write_label("IF_FALSE" + str(if_index))

        return

    def compile_var_dec(self, caller):
        """
        format: 'var' type varName ( ',' varName)* ';'
        :param caller:
        :return:
        """

        kind = self.tokenizer.key_word()
        #SubElement(caller, KEYWORD).text = kind  # set var as keyword
        self.next()

        return self.compile_list_of_vars(caller, "var", Kind[kind])

    def compile_class(self, caller):
        """

        :param caller:
        :return:
        """
        SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
        self.next()

        SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
        self.class_name = self.tokenizer.identifier()
        self.next()

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  #{
        self.next()

        while self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() in {'static', 'field'}:
            self.compile_classVarDec(SubElement(caller, "classVarDec"))

        while not self.tokenizer.token_type() is JTok.SYMBOL:
            self.compile_subroutine(SubElement(caller, "subroutineDec"))

        SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  #}
        self.next()

    def compile_list_of_vars(self, caller, category, kind):
        """
        Helper method to compile lists of variables according to
        type varName (',' varName)*
        :param caller:
        :return:
        """
        num_of_vars = 0
        type = self.compile_type(caller)
        self.symbols.define(self.tokenizer.identifier(), type, kind)
        num_of_vars += 1
        #text = category+", defined, "+type+", "+kind.name+", "+str(self.symbols.index_of(self.tokenizer.identifier()))
        #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text  # set var name  as identifier
        self.next()

        while self.tokenizer.symbol() != ';':
            #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ','
            self.next()

            self.symbols.define(self.tokenizer.identifier(), type, kind)
            num_of_vars += 1
            #text = category + ", defined, " + type + ", " + kind.name + ", " + str(
            #    self.symbols.index_of(self.tokenizer.identifier()))
            #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text  # set var name
            self.next()

        #SubElement(caller, SYMBOL).text = self.tokenizer.symbol()  # set ';'
        self.next()
        return num_of_vars

    def compile_classVarDec(self, caller):
        """

        :param caller:
        :return:
        """
        kind = self.tokenizer.key_word()
        #SubElement(caller,KEYWORD).text = kind
        self.next()

        self.compile_list_of_vars(caller, kind, Kind[kind])

    def compile_type(self, caller):
        """
        Compiles a tag according to type, for variables
        :param caller:
        :return:
        """
        tag = KEYWORD if self.tokenizer.token_type(
        ) is JTok.KEYWORD else IDENTIFIER
        text = self.tokenizer.key_word(
        ) if tag is KEYWORD else self.tokenizer.identifier()
        SubElement(caller, tag).text = text
        self.next()
        return text

    def compile_subroutine(self, caller):
        """

        :param caller:
        :return:
        """

        subroutine_type = self.tokenizer.key_word()
        self.next()

        # Just to skip void or type
        if self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() == "void":
            SubElement(caller, KEYWORD).text = self.tokenizer.key_word()
            self.next()
        else:
            self.compile_type(caller)

        name = self.class_name + "." + self.tokenizer.identifier()
        self.symbols.start_subroutine()
        self.next()

        self.next()  # Skips (
        if subroutine_type == "method":
            self.symbols.define("this", "", Kind.arg)
        self.compile_parameterList(SubElement(caller, "parameterList"))

        self.next()  # Skips )

        self.next()  # Skips {

        num_of_locals = 0
        while self.tokenizer.token_type(
        ) is JTok.KEYWORD and self.tokenizer.key_word() == "var":
            num_of_locals += self.compile_var_dec(SubElement(caller, "varDec"))

        self.writer.write_function(name, num_of_locals)

        if subroutine_type == "constructor":
            self.writer.write_push(CONSTANT,
                                   self.symbols.var_count(Kind.field))
            self.writer.write_call("Memory.alloc", 1)
            self.writer.write_pop(POINTER, 0)

        elif subroutine_type == "method":
            self.writer.write_push(ARGUMENT, 0)
            self.writer.write_pop(POINTER, 0)

        self.compile_statements(SubElement(caller, "statements"))

        self.next()  # Skips }

    def compile_parameterList(self, caller):
        """

        :param caller:
        :return:
        """
        if self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ")":
            caller.text = " "
            return

        type = self.compile_type(caller)
        name = self.tokenizer.identifier()

        # SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier()
        self.symbols.define(name, type, Kind.arg)
        self.next()

        while self.tokenizer.token_type(
        ) is JTok.SYMBOL and self.tokenizer.symbol() == ",":
            # SubElement(caller,SYMBOL).text = self.tokenizer.symbol()
            self.next()
            type = self.compile_type(caller)
            name = self.tokenizer.identifier()
            self.symbols.define(name, type, Kind.arg)
            #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()
            self.next()
Esempio n. 22
0
class CompilationEngine:
    _OPEN_PARENTHESIS = "\("
    _CLOSE_PARENTHESIS = "\)"
    _OPEN_BRACKET = "\["
    _CLOSE_BRACKET = "\]"
    _DOT = "\."
    _OPS = "\+|-|\*|\/|&|\||<|>|="

    def __init__(self, in_address):
        self.tokenizer = Tokenizer(in_address)
        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(in_address.replace(".jack", ".vm"))
        self.curr_token = self.tokenizer.get_current_token()
        self.out_address = in_address.replace(".jack", ".xml")
        self.output = ""
        self.indent = 0
        self.label_count = -1
        self.class_name = ""
        self.compile_class()

    def write_file(self):
        # with open(self.out_address, 'w') as f:
        #     f.write(self.output)
        self.vm_writer.write_file()

    def write(self, to_write):
        """
        Writes to the output, with indentation.
        :param to_write: The string to write
        """
        self.output += (self.indent * " ") + to_write + "\n"

    # ========== Compilation Methods ========== #

    def compile_class(self):
        """
        Compiles a complete class.
        """
        def comp_class():
            self.eat("class")
            self.class_name = self.eat(NAME_REG)
            self.eat("{")
            self.compile_class_var_dec()
            self.compile_subroutine()
            self.eat("}")

        self.wrap("class", comp_class)

    def compile_class_var_dec(self):
        """
        Compiles a static or field declaration.
        :return:
        """
        var_type_reg = "static|field"
        if self.peek_token(var_type_reg):
            self.wrap("classVarDec", self.__class_var_dec)
            self.compile_class_var_dec()

    def compile_subroutine(self):
        """
        Compiles a complete method, function or constructor.
        :return:
        """
        sub_regex = "(constructor|function|method)"
        self.symbol_table.start_subroutine()
        kind = self.eat(sub_regex)
        self.__compile_type(True)
        # subroutine name
        name = self.__compile_name()
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        self.compile_parameter_list(kind)
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.eat("{")
        if self.peek_token("var"):
            self.compile_var_dec()
        num_locals = self.symbol_table.var_count("local")
        self.vm_writer.write_function("{}.{}".format(self.class_name, name),
                                      num_locals)
        self.__set_pointer(kind)
        self.compile_statements()
        self.eat("}")

        # def subroutine_dec():
        #     kind = self.eat(sub_regex)
        #     self.__compile_type(True)
        #     # subroutine name
        #     name = self.__compile_name()
        #     self.eat(CompilationEngine._OPEN_PARENTHESIS)
        #     self.compile_parameter_list(kind)
        #     self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        #     subroutine_body(name)
        #     # self.wrap("subroutineBody", subroutine_body)
        #
        # def subroutine_body(name):
        #     self.eat("{")
        #     num_locals = 0
        #     if self.peek_token("var"):
        #         num_locals = self.compile_var_dec()
        #     self.vm_writer.write_function("{}.{}".format(self.class_name,
        #                                                  name), num_locals)
        #
        #     self.compile_statements()
        #     # if sub_type == "void":
        #     #     self.vm_writer.write_push("constant", 0)
        #     self.eat("}")
        # Handle next subroutine if there is one
        if self.peek_token(sub_regex):
            self.compile_subroutine()

    def compile_parameter_list(self, kind):
        """
        Compiles a possibly empty parameter list, not including the
        enclosing ()
        :return:
        """
        if kind == "method":
            self.symbol_table.define("this", self.class_name, "argument")
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        while self.peek_token(type_reg):
            self.__params()

    def compile_var_dec(self):
        """
        Compiles a var declaration.
        :return:
        """
        # self.wrap("varDec", self.__comp_var_dec)
        self.eat("var")
        var_type = self.__compile_type(False)
        self.__var_declare(var_type, "var")
        self.eat(";")
        if self.peek_token("var"):
            self.compile_var_dec()

    def compile_statements(self):
        """
        Compiles a sequence of statements, not including the enclosing {}
        :return:
        """
        statement_reg = "let|if|while|do|return"
        if self.peek_token(statement_reg):
            if self.peek_token("let"):
                self.compile_let()
            elif self.peek_token("if"):
                self.compile_if()
            elif self.peek_token("while"):
                self.compile_while()
            elif self.peek_token("do"):
                self.compile_do()
            elif self.peek_token("return"):
                self.compile_return()
            self.compile_statements()

    def compile_do(self):
        """
        Compiles a do statement
        """
        self.eat("do")
        self.__subroutine_call()
        # Since we don't use the return value, we pop it to temp
        self.vm_writer.write_pop("temp", 0)
        self.eat(";")

    def compile_let(self):
        """
        Compiles a let statement
        """
        self.eat("let")
        name = self.__compile_name()
        is_array = False
        # Determine [expression]
        if self.peek_token(CompilationEngine._OPEN_BRACKET):
            is_array = True
            self.__handle_array(name)
        self.eat("=")
        self.compile_expression()
        # Pop the value to the spot in the memory
        if is_array:
            self.vm_writer.write_pop("temp", 0)
            self.vm_writer.write_pop("pointer", 1)
            self.vm_writer.write_push("temp", 0)
            self.vm_writer.write_pop("that", 0)
        else:
            self.__write_pop(name)
        self.eat(";")

    def compile_while(self):
        """
        Compiles a while statement.
        :return:
        """
        self.eat("while")
        loop_label = self.__get_label("WHILE_START")
        exit_label = self.__get_label("WHILE_END")
        self.vm_writer.write_label(loop_label)
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        # Compute ~condition
        self.compile_expression()
        self.vm_writer.write_arithmetic("~")
        # if ~condition exit loop
        self.vm_writer.write_if(exit_label)
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.eat("{")
        self.compile_statements()
        self.vm_writer.write_goto(loop_label)
        self.vm_writer.write_label(exit_label)
        self.eat("}")

    def compile_return(self):
        """
        Compiles a return statement.
        """
        self.eat("return")
        # if next is expression:
        if self.__is_term():
            self.compile_expression()
        else:
            # Void function - push 0
            self.vm_writer.write_push(CONSTANT, 0)
        self.vm_writer.write_return()
        self.eat(";")

    def compile_if(self):
        """
        Compiles an if statement, possibly with a trailing else clause.
        :return:
        """
        self.eat("if")
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        # ~cond
        self.compile_expression()
        # self.vm_writer.write_arithmetic("~")
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.eat("{")
        if_true = self.__get_label("IF_TRUE")
        self.vm_writer.write_if(if_true)
        if_false = self.__get_label("IF_FALSE")
        self.vm_writer.write_goto(if_false)
        self.vm_writer.write_label(if_true)
        self.compile_statements()
        self.eat("}")
        # Handle else:
        if self.peek_token("else"):
            if_end = self.__get_label("IF_END")
            self.vm_writer.write_goto(if_end)
            self.vm_writer.write_label(if_false)
            self.eat("else")
            self.eat("{")
            self.compile_statements()
            self.eat("}")
            self.vm_writer.write_label(if_end)
        else:
            self.vm_writer.write_label(if_false)

    def compile_expression(self):
        """
        Compiles an expression.
        :return:
        """
        def comp_expression():
            self.compile_term()
            # Case: term op term
            if self.peek_token(CompilationEngine._OPS):
                operation = self.eat(CompilationEngine._OPS)
                self.compile_term()
                self.vm_writer.write_arithmetic(operation)

        self.wrap("expression", comp_expression)

    def compile_term(self):
        """
        Compiles a term.
        :return:
        """
        def term():
            curr_type = self.peek_type()
            val = self.curr_token.get_token()
            # Handle integer constant
            if curr_type == INT_CONST:
                self.vm_writer.write_push(CONSTANT, int(val))
                self.__advance_token()
            # Handle String constant
            elif curr_type == STRING_CONST:
                self.__handle_string_constant(val)
                self.__advance_token()
            # Handle Keyword constant
            elif curr_type == KEYWORD:
                self.__handle_keyword_constant(val)
                self.__advance_token()
            # Case: token is a varName or a subroutineName
            elif curr_type == IDENTIFIER:
                self.__handle_identifier()
            # Case: ( expression )
            elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS):
                self.eat(CompilationEngine._OPEN_PARENTHESIS)
                self.compile_expression()
                self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            # Case: unaryOp term
            elif self.peek_token("-|~"):
                self.__handle_unary_op()
            else:
                print("Error: Incorrect Term")
                exit(-1)

        term()
        # self.wrap("term", term)

    def compile_expression_list(self):
        """
        Compiles a possibly empty list of comma separated expressions
        :return:
        """
        def exp_list():
            count = 0
            if self.__is_term():
                self.compile_expression()
                count += 1
                while self.peek_token(","):
                    self.eat(",")
                    self.compile_expression()
                    count += 1
            return count

        return exp_list()
        # self.wrap("expressionList", exp_list)

    # ========== Compilation Helper ========== #

    def __class_var_dec(self):
        """
        Compiles a single class var declaration.
        """
        var_type_reg = "static|field"
        # (static|field)
        kind = self.eat(var_type_reg)
        # type
        var_type = self.__compile_type(False)
        # Compile varName combo until no more ","
        self.__var_declare(var_type, kind)
        self.eat(";")

    def __var_declare(self, var_type, kind):
        name = self.eat(NAME_REG)
        self.symbol_table.define(name, var_type, kind)
        if self.peek_token(","):
            self.eat(",")
            self.__var_declare(var_type, kind)

    def __compile_type(self, for_function):
        """
        Compiles a type for a function or variable, determined by
        a received boolean value.
        :param for_function: True if is type of function, false otherwise.
        :return:
        """
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        if for_function:
            type_reg += "|void"
        return self.eat(type_reg)

    def __set_pointer(self, kind):
        if kind == "method":
            self.vm_writer.write_push("argument", 0)
            self.vm_writer.write_pop("pointer", 0)
        elif kind == "constructor":
            self.__handle_constructor()

    def __handle_constructor(self):
        # Allocate memory for the new object
        var_num = self.symbol_table.var_count("this")
        self.vm_writer.write_push(CONSTANT, var_num)
        self.vm_writer.write_call("Memory.alloc", 1)
        # Set the new memory spot to this
        self.vm_writer.write_pop("pointer", 0)

    def __compile_name(self):
        if self.peek_type() == IDENTIFIER:
            return self.eat(NAME_REG)
        else:
            print("ERROR: Identifier Expected")
            exit(-1)

    def __params(self):
        var_type = self.__compile_type(False)
        name = self.eat(NAME_REG)
        self.symbol_table.define(name, var_type, "argument")
        if self.peek_token(","):
            self.eat(",")

    def __handle_unary_op(self):
        command = self.eat("-|~")
        self.compile_term()
        if command == "-":
            self.vm_writer.write_arithmetic("neg")
        else:
            self.vm_writer.write_arithmetic(command)

    def __handle_identifier(self):
        """
        Handles the case of an identifier given as a term
        """
        # Case: varName [ expression ]
        if self.peek_next(CompilationEngine._OPEN_BRACKET):
            name = self.__compile_name()
            self.__handle_array(name)
            self.vm_writer.write_pop("pointer", 1)
            self.vm_writer.write_push("that", 0)
            # self.__var_name_array()
        # Case: subroutineCall:
        elif self.peek_next(CompilationEngine._OPEN_PARENTHESIS) or \
                self.peek_next(CompilationEngine._DOT):
            self.__subroutine_call()
        else:
            name = self.eat(NAME_REG)
            self.__write_push(name)

    def __handle_string_constant(self, string):
        """
        Handles the case of a string constant in a term
        :param string: the constant
        """
        self.vm_writer.write_push(CONSTANT, len(string))
        self.vm_writer.write_call("String.new", 1)
        for char in string:
            self.vm_writer.write_push(CONSTANT, ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

    def __handle_keyword_constant(self, word):
        """
        Handles the case of a keyword constant given in a term.
        If the word is not valid the program prints a relevant message and
        exits.
        :param word: The keyword
        """
        if word == "this":
            self.vm_writer.write_push("pointer", 0)
        else:
            self.vm_writer.write_push(CONSTANT, 0)
            if word == "true":
                self.vm_writer.write_arithmetic("~")

    def __is_term(self):
        curr_type = self.peek_type()
        return curr_type == STRING_CONST or curr_type == INT_CONST or \
               curr_type == KEYWORD or curr_type == IDENTIFIER or \
               self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \
               self.peek_token(CompilationEngine._OPS)

    def __subroutine_call(self):
        if self.curr_token.get_type() == IDENTIFIER:
            if self.peek_next(CompilationEngine._OPEN_PARENTHESIS):
                self.vm_writer.write_push("pointer", 0)
                self.__subroutine_name(self.class_name, 1)
            elif self.peek_next(CompilationEngine._DOT):
                self.__object_subroutine_call()
            else:
                print("Error: ( or . expected")
                exit(-1)

    def __object_subroutine_call(self):
        name = self.eat(NAME_REG)

        n_args = 0
        # Push the object reference to the stack
        if self.symbol_table.kind_of(name):
            self.__write_push(name)
            name = self.symbol_table.type_of(name)
            n_args = 1
        self.eat(CompilationEngine._DOT)
        self.__subroutine_name(name, n_args)

    def __subroutine_name(self, type_name, n_args):
        """
        Handles the case of subroutineName(expressionList)
        :return:
        """
        name = self.eat(NAME_REG)
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        nargs = self.compile_expression_list()
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.vm_writer.write_call("{}.{}".format(type_name, name),
                                  nargs + n_args)

    def __handle_array(self, name):
        self.eat(CompilationEngine._OPEN_BRACKET)
        self.compile_expression()
        self.eat(CompilationEngine._CLOSE_BRACKET)
        self.__write_push(name)
        self.vm_writer.write_arithmetic("+")

    # ========== XML Handling ========== #

    def wrap(self, section_name, func):
        """
        Wraps a program structure block with the section_name, and executes
        its function
        :param section_name: The name of the section
        :param func: The function to perform
        :return:
        """
        self.write("<{}>".format(section_name))
        self.indent += 2
        func()
        self.indent -= 2
        self.write("</{}>".format(section_name))

    # ========== Token Handling ========== #

    def eat(self, token):
        """
        Handles advancing and writing terminal tokens.
        Will exit the program if an error occurs.
        :param token: The regex of the token to compare
        :return:
        """
        ctoken = self.curr_token.get_token()
        if re.match(token, self.curr_token.get_token()):
            # self.write(self.curr_token.get_xml_wrap())
            self.__advance_token()
            return ctoken
            # else:
            #     # if self.tokenizer.get_current_token() != token:
            #     print("Error: Expected " + token)
            #     exit(-1)

    def peek_token(self, compare_next):
        """
        :param compare_next: The regex to compare.
        :return: True if the current token matches the regex, False otherwise.
        """
        if self.curr_token:
            return re.match(compare_next, self.curr_token.get_token())
        return False

    def peek_type(self):
        """
        :return: the type of the current token
        """
        return self.curr_token.get_type()

    def peek_next(self, comp):
        next_token = self.tokenizer.get_next_token()
        # Case: There actually is a next token
        if next_token:
            return re.match(comp, self.tokenizer.get_next_token().get_token())
        return False

    def __advance_token(self):
        self.tokenizer.advance()
        if self.tokenizer.has_more_tokens():
            self.curr_token = self.tokenizer.get_current_token()

    # ========== VM Helper ========== #

    def __get_label(self, label):
        self.label_count += 1
        return "{}{}".format(label, str(self.label_count))

    def __write_pop(self, name):
        self.vm_writer.write_pop(self.symbol_table.kind_of(name),
                                 self.symbol_table.index_of(name))

    def __write_push(self, name):
        self.vm_writer.write_push(self.symbol_table.kind_of(name),
                                  self.symbol_table.index_of(name))
Esempio n. 23
0
class CompilationEngine:
    def __init__(self, input_path, output_path):
        self.class_name = ''
        self.subroutine_name = ''
        self.if_counter = -1
        self.while_counter = -1
        self.subroutine_num_arg = 0
        self.tkx = JackTokenizer(input_path)
        self.class_table = symbolTable()
        self.subroutine_table = symbolTable()
        self.vm_writer = VMWriter(output_path)
        self.compile_class(output_path)

    def compile_class(self, output_path):
        """
        complete class
        """
        #Class
        self.tkx.advance()

        #className
        self.subroutine_table.class_name = self.tkx.advance()
        self.class_name = self.tkx.current_token()

        #{
        self.tkx.advance()

        self.tkx.advance()
        while self.tkx.current_token() == 'static' or self.tkx.current_token(
        ) == 'field':
            self.compile_class_var_dec()
            self.tkx.advance()

        while self.tkx.current_token(
        ) == 'constructor' or self.tkx.current_token(
        ) == 'function' or self.tkx.current_token() == 'method':
            self.compile_subroutine_dec()
            self.tkx.advance()

        # tree = ET.ElementTree(root)
        # rough_string = ET.tostring(root, 'utf-8')
        # reparsed = minidom.parseString(rough_string)
        # out_file = open(output_path, 'w')
        # out_file.write(reparsed.toprettyxml(indent="\t")[reparsed.toprettyxml(indent="\t").find('\n')+1:])

    def compile_subroutine_dec(self):
        """
        static declaration or field declaration
        """
        self.if_counter = -1
        self.while_counter = -1
        was_constructor = False
        was_method = False
        is_type = True
        # constructor or function or method

        subroutine = self.tkx.current_token()

        # void or type
        self.tkx.advance()  # todo check if we need 2 advances

        self.subroutine_table.start_subroutine()

        if subroutine == 'constructor':
            was_constructor = True

        else:
            if subroutine == 'method':
                was_method = True
                self.subroutine_table.define(THIS, self.class_name, 'argument')
                self.subroutine_num_arg = 1
        self.subroutine_name = self.class_name + '.'

        # subroutine name
        self.tkx.advance()
        self.subroutine_name += self.tkx.current_token()

        #todo: check is_type

        # (
        self.tkx.advance()

        self.compile_parameter_list()

        self.compile_subroutine_body(was_constructor, was_method)

    def compile_parameter_list(self):
        """
        parameter list
        """

        if self.tkx.advance() != ')':
            # type
            type = self.tkx.current_token()

            # var name
            name = self.tkx.advance()

            self.subroutine_table.define(name, type, 'argument')
        else:
            return

        self.tkx.advance()
        while self.tkx.current_token() != ')':

            # type
            type = self.tkx.advance()

            # var name
            name = self.tkx.advance()
            self.subroutine_table.define(name, type, 'argument')
            self.tkx.advance()

    def compile_subroutine_body(self, was_constructor, was_method):
        """
        subroutine body
        Inside declaration
        """
        # self.subroutine_num_arg = 0
        # {
        self.tkx.advance()

        # var declaration
        while self.tkx.get_next_token() == 'var':
            self.compile_var_dec()

        self.vm_writer.write_function(self.subroutine_name,
                                      self.subroutine_table.var_count(LOCAL))
        if was_constructor:
            self.vm_writer.write_push(CONSTANT,
                                      self.class_table.var_count(FIELD))
            self.vm_writer.write_call("Memory.alloc", 1)
            self.vm_writer.write_pop(POINTER, 0)
        elif was_method:
            self.vm_writer.write_push(ARGUMENT, 0)
            self.vm_writer.write_pop(POINTER, 0)
        self.compile_statements()

    def compile_class_var_dec(self):
        """
        class variable declaration
        """
        self.subroutine_num_arg = 0
        # static or field
        kind = self.tkx.current_token()

        # type
        type = self.tkx.advance()

        # var name
        name = self.tkx.advance()

        self.class_table.define(name, type, kind)

        self.tkx.advance()
        while self.tkx.current_token() != ';':
            # var name
            name = self.tkx.advance()
            self.class_table.define(name, type, kind)
            self.tkx.advance()

    def compile_var_dec(self):
        """
        variable declaration
        """
        # var
        self.tkx.advance()

        # type
        type = self.tkx.advance()

        # var name
        name = self.tkx.advance()

        self.subroutine_table.define(name, type, LOCAL)

        self.tkx.advance()
        while self.tkx.current_token() != ';':
            # var name
            name = self.tkx.advance()
            self.subroutine_table.define(name, type, LOCAL)
            self.tkx.advance()

    def compile_statements(self):
        """
        statements
        """
        # for each statement in statements
        self.tkx.advance()

        while self.tkx.current_token() != '}':
            self.compile_statement()
            self.tkx.advance()

    def compile_statement(self):
        if self.tkx.current_token() == 'let':
            self.compile_let()
        elif self.tkx.current_token() == 'if':
            self.compile_if()
        elif self.tkx.current_token() == 'do':
            self.compile_do()
        elif self.tkx.current_token() == 'while':
            self.compile_while()
        elif self.tkx.current_token() == 'return':
            self.compile_return()

    def compile_let(self):
        """
        let statement
        After that this is a var declaration
        'let' varName ('['expression']')? '=' expression ';'
        """
        was_array = False
        name = self.tkx.advance()  # identifier
        if self.tkx.advance() == '[':
            self.compile_expression()
            kind = self.get_kind(name)
            index = self.get_index(name)

            self.vm_writer.write_push(kind, index)
            self.vm_writer.write_arithmetic('add')

            was_array = True
            # ']'
            self.tkx.advance()

        self.compile_expression()  # =
        if not was_array:
            kind = self.get_kind(name)
            index = self.get_index(name)

            self.vm_writer.write_pop(kind, index)
        else:
            self.vm_writer.write_pop(TEMP, 0)
            self.vm_writer.write_pop(POINTER, 1)
            self.vm_writer.write_push(TEMP, 0)
            self.vm_writer.write_pop(THAT, 0)

    def compile_if(self):
        """
        if statement
        'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')?
        """
        isElse = False
        self.if_counter += 1
        label_if = self.if_counter
        self.tkx.advance()  # '(' symbol
        self.compile_expression()
        self.vm_writer.write_if_goto(IF_TRUE + str(label_if))
        self.vm_writer.write_goto(IF_FALSE + str(label_if))
        self.vm_writer.write_label(IF_TRUE + str(label_if))
        # ')' symbol
        self.tkx.advance()  # '{' symbol
        self.compile_statements()

        # self.vm_writer.write_label(IF_FALSE + str(self.if_counter))
        # '}' symbol
        if self.tkx.get_next_token() == 'else':
            isElse = True
            self.vm_writer.write_goto(IF_END + str(label_if))
            self.vm_writer.write_label(IF_FALSE + str(label_if))

            self.tkx.advance()  # else
            self.tkx.advance()  # '{' symbol
            self.compile_statements()

            # '}' symbol

        if isElse:
            self.vm_writer.write_label(IF_END + str(label_if))
        else:
            self.vm_writer.write_label(IF_FALSE + str(label_if))

    def compile_do(self):
        """
        do statement
        """
        # do

        # name
        name = self.tkx.advance()

        self.tkx.advance()

        self.compile_subroutine_call(name, True)

        # ;
        self.tkx.advance()

    def compile_while(self):
        """
        while statement
        """
        self.while_counter += 1
        while_label = self.while_counter
        self.vm_writer.write_label(WHILE_LABEL + str(while_label))

        self.tkx.advance()  # '(' symbol
        self.compile_expression()
        self.vm_writer.write_arithmetic('not')
        self.vm_writer.write_if_goto(END_WHILE + str(while_label))
        # ')' symbol
        self.tkx.advance()  # '{' symbol
        self.compile_statements()
        # '}' symbol
        self.vm_writer.write_goto(WHILE_LABEL + str(while_label))
        self.vm_writer.write_label(END_WHILE + str(while_label))

    def compile_return(self):
        """
        return statement
        """
        # return
        if self.tkx.get_next_token() != ';':
            self.compile_expression()
            self.vm_writer.write_return()
            # ;
            return
        self.vm_writer.write_push(CONSTANT, 0)
        # ;
        self.tkx.advance()
        self.vm_writer.write_return()

    def compile_expression(self):
        """
        expression
        Maybe after all "()"
        """
        self.tkx.advance()
        self.compile_term()

        while self.tkx.current_token() in OP:
            op = self.tkx.current_token()
            self.tkx.advance()
            self.compile_term()
            self.vm_writer.write_arithmetic(OP_TRANSLATOR[op])

    def compile_expression_list(self):
        """
        expression list
        Maybe after all "()" that in call to function
        """
        self.subroutine_num_arg = 0
        if self.tkx.get_next_token() != ')':
            self.compile_expression()
            self.subroutine_num_arg += 1

        else:
            self.tkx.advance()
            return

        while self.tkx.current_token() != ')':
            self.compile_expression()
            self.subroutine_num_arg += 1

    def compile_term(self):
        """
        term
        Distinguish between the kinds by "(", "." and "["
        (See the explanation in the book)
        """
        if self.tkx.token_type() == TERM_INT_CONST:
            self.vm_writer.write_push(CONSTANT, str(self.tkx.current_token()))
            self.tkx.advance()
        elif self.tkx.token_type() == TERM_STRING_CONST:
            self.write_string_const(self.tkx.current_token())
            self.tkx.advance()
        elif self.tkx.token_type() == TERM_KEYWORD:
            if self.tkx.current_token() in {'true', 'false', 'null'}:
                self.vm_writer.write_push(CONSTANT, 0)
                if self.tkx.current_token() == 'true':
                    self.vm_writer.write_arithmetic('not')
            else:  # this
                self.vm_writer.write_push(POINTER, 0)
            self.tkx.advance()
        elif self.tkx.token_type() == TERM_SYMBOL:
            if self.tkx.current_token() == '(':
                self.compile_expression()
                self.tkx.advance()
            else:
                unary_op = self.tkx.current_token()
                self.tkx.advance()
                self.compile_term()
                self.vm_writer.write_arithmetic(UNARY_OP_TRANSLATOR[unary_op])
        elif self.tkx.token_type() == TERM_IDENTIFIER:
            name = self.tkx.current_token()
            kind = self.get_kind(name)
            index = self.get_index(name)
            self.tkx.advance()
            if self.tkx.current_token() == '[':
                self.compile_expression()
                self.vm_writer.write_push(kind, index)
                self.vm_writer.write_arithmetic('add')
                self.vm_writer.write_pop(POINTER, 1)
                self.vm_writer.write_push(THAT, 0)
                # ]
                self.tkx.advance()
            elif self.tkx.current_token() == '(' or self.tkx.current_token(
            ) == '.':
                self.compile_subroutine_call(name, False)
                self.tkx.advance()
            else:
                self.vm_writer.write_push(kind, index)

    def compile_subroutine_call(self, name, isDo):
        """
        subroutine call
        """
        was_method = False
        if self.tkx.current_token() == '.':

            kind = self.get_kind(name)
            if kind:
                index = self.get_index(name)
                self.vm_writer.write_push(kind, index)
                was_method = True
                name = self.get_type(name) + '.' + self.tkx.advance()
            # subroutine name
            else:
                name += '.' + self.tkx.advance()

            # (
            self.tkx.advance()

        elif '.' not in name:
            name = self.class_name + '.' + name
            was_method = True
            self.vm_writer.write_push(POINTER, 0)
        self.compile_expression_list()
        if was_method:
            self.subroutine_num_arg += 1

        self.vm_writer.write_call(name, self.subroutine_num_arg)
        self.subroutine_num_arg = 0
        if isDo:
            self.vm_writer.write_pop(TEMP, 0)

        # )

    def get_kind(self, name):
        if self.subroutine_table.kind_of(name) is not None:
            return self.subroutine_table.kind_of(name)
        return self.class_table.kind_of(name)

    def get_type(self, name):
        if self.subroutine_table.type_of(name) is not None:
            return self.subroutine_table.type_of(name)
        return self.class_table.type_of(name)

    def get_index(self, name):
        if self.subroutine_table.index_of(name) is not None:
            return self.subroutine_table.index_of(name)
        return self.class_table.index_of(name)

    def write_string_const(self, str):
        re_str = str.replace('&quot', '')
        self.vm_writer.write_push(CONSTANT, len(re_str))
        self.vm_writer.write_call('String.new', 1)
        for char in re_str:
            self.vm_writer.write_push(CONSTANT, ord(char))
            self.vm_writer.write_call("String.appendChar", 2)