Example #1
0
 def makeTable(self, _prev):
     # create a new symbol table table
     newST = ST()
     # make _prev the parent of new table
     newST.parent = _prev
     # return the new symbol table
     return newST
def main():
	"output file is the file where output will be written to"
	filename = sys.argv[1].split('.')[0]
	outputfile = open( filename + ".hack", "a" )

	"input file is the file where input will come from"
	inputfile = Parser( sys.argv[1] )

	lines = inputfile.commandLines()

	for line in lines:
		if( ParserComd( line ).commandType() == 'A_command' ):
			symbol_line = ParserComd( line ).symbol( )
			symbol_a = SymbolTable( )
			symbol_a.addEntry( symbol_line )
			f = symbol_a.GetAddress( symbol_line )
			outputfile.write( f )
			outputfile.write( '\n' )

		elif( ParserComd( line ).commandType() == 'C_command_a' or ParserComd( line ).commandType() == 'C_command_b'):
			dest_line = ParserComd( line ).dest()
			comp_line = ParserComd( line ).comp()
			jump_line = ParserComd( line ).jump()
			cbinary = Code( dest_line, comp_line, jump_line ).cinstruction()
			outputfile.write( cbinary )
			outputfile.write( '\n' )
		elif( ParserComd( line ).commandType() == 'L_command' ):
			outputfile.write( 'This line is going to delete\n' )

	outputfile.close()
Example #3
0
class symboltabletests(unittest.TestCase):

    def setUp(self):
        self.st = SymbolTable()

    def testContains(self):
        self.st.addEntry("loop", 100)
        self.assertTrue(self.st.contains("loop"))
        self.assertFalse(self.st.contains("bobby"))
Example #4
0
def first_pass(path):
    p = Parser(path)

    symbol_table = SymbolTable()
    n = 0
    while(p.hasMoreCommands()):
        command_type = p.commandType()
        if(command_type == CommandType.L):
            symbol_table.add_entry(p.symbol(), n)
        else:
            n += 1
        p.advance()
    return symbol_table
Example #5
0
 def visit_Fundef(self, node):
     node.Functions.putNewFun(node.id, node.type) 
     Functions = FunctionsTable(node.Functions, "Functions")
     Variables = SymbolTable(node.Variables, "Variables")
     node.argList.Functions = Functions
     node.argList.Variables = Variables
     listOfArguments = node.argList.accept(self)
     for element in listOfArguments:
         if element!= None:
             node.Functions.put(node.id, element[1])
             if Variables.put(element[0], element[1])==-1:
                 self.errors.append("In line "+ str(node.lineno) + ": variable "+ element.name + " was initialized")
     node.compoundInstr.Functions = Functions
     node.compoundInstr.Variables = Variables
     node.compoundInstr.accept(self)
    def setUp(self):
        self.assembler = Assembler()
        parser = Parser()
        self.symbolTable = SymbolTable()

        self.assembler.setSymbolTable(self.symbolTable)
        self.assembler.setParser(parser)
Example #7
0
 def __init__(self):
     # create the root symbol table
     self.root = ST()
     # intialize the stack of symbol tables for activation record
     self.activeSTs = [self.root]
     # table of funcitons
     self.ftable = {};
Example #8
0
 def visit_FunDef(self,node):
     # print "visiting FunDef"
     self.symbolTable = SymbolTable(self.symbolTable,node.id.value)
     self.symbolTable.getParentScope().put(node.id.value,FunSymbol(node.typeOrId, node.id.value, map(lambda x: x.accept(self),node.argList.list)))
     node.compoundInstr.accept(self)
     self.symbolTable = self.symbolTable.getParentScope()
     return node.id.value
 def __init__(self, inputFile, outputFile):
     self.tokenizer = JackTokenizer(inputFile)
     self.vmWriter = VMWriter(outputFile)
     self.symbolTable = SymbolTable()
     self.classname = ""
     self.CompileClass()
     self.whilecounter = 0
     self.ifcounter = 0
Example #10
0
 def visit_ClassDef(self,node):
     # print "visiting ClassDef"
     self.symbolTable = SymbolTable(self.symbolTable if node.parentId == None else self.classTables[node.parentId.value], node.id.value)
     classSymbol = ClassSymbol(node.accessmodificator, node.id.value, node.parentId if node.parentId == None else node.parentId.accept(self),node.classcontent.accept(self))
     self.classTables[node.id.value]=self.symbolTable
     while self.symbolTable.parent!=None:
         self.symbolTable = self.symbolTable.getParentScope()
     self.symbolTable.put(node.id.value,classSymbol)
Example #11
0
 def visit_Program(self,node):
     try:
         #print "visiting Program"
         self.symbolTable=SymbolTable(None,'main')
         node.declarations.accept(self)
         node.fundefs.accept(self)
         node.instructions.accept(self)
     except:
         self.error("could not continue parsing, correct errors first",0)
 def __init__(self, tokenizer, outputFile, vmFile):
     from SymbolTable import SymbolTable
     from VMWriter import VMWriter
     self.tokenizer = tokenizer
     self.outputFile = outputFile
     self.symbolTable = SymbolTable()
     self.vmWriter = VMWriter(vmFile)
     self.labelNum = 0
     print(outputFile)
Example #13
0
 def __init__(self,tokens,vmwriter):
     try:
         tokens[0].value
         tokens[0].type
     except:
         sys.exit("Parser did not take in a list of tokens!")
     self.tokens=tokens
     self.vmwriter=vmwriter
     self.symTable=SymbolTable()
Example #14
0
    def visit_CompoundInstruction(self, node):
        self.scope = SymbolTable(self.scope, "compound")

        for declaration in node.decls:
            declaration.accept(self)
        for instruction in node.instrs:
            instruction.accept(self)

        # Get the hell out of function scope, after its done
        self.scope = self.scope.parent
Example #15
0
 def __init__(self, tokenizer, out_file_name):
     """
     Constructor
     """
     self._tokenizer = tokenizer
     self._vm_writer = VMWriter(out_file_name)
     self._class_name = None
     self._symbol_table = SymbolTable()
     self._counter = 0
     self._subroutine_name = None
 def __init__(self, input_file, output_file):
     self.jack_tokenizer = JackTokenizer(input_file)
     self.symbol_table = SymbolTable()
     self.writer = VMWriter(output_file)
     self.class_name = ""
     self.subroutine_name = ""
     self.return_type = ""
     self.label_counter_if = 0
     self.label_counter_while = 0
     self.num_args_called_function = 0
     self.is_unary = False
     self.dic_arithmetic = {"+" : "add" , "-" : "sub", "*" : "call Math.multiply 2",
                            "/" : "call Math.divide 2", "&" : "and", "|" : "or", "<" : "lt", ">" : "gt", "=" : "eq"}
Example #17
0
    def __init__(self, input_file_path):
        self.output_file = open(input_file_path.replace('.asm', '.hack'), 'w')

        """
        The ROM address is the address of the current instruction written in the
        .hack file. The first instruction is 0, second is 1, etc. Label is not a instruction.
        """
        self.current_rom_address = ROM_BASE_ADRESS

        self.parser = Parser(input_file_path)
        self.symbol_table = SymbolTable()

        """
        The RAM address of the next free memory that a new variable
        should be at.
        """
        self.next_free_var_address = VARIABLES_BASE_ADDRESS
    def compileClass(self):
        """Compiles a complete class"""
        self.classTable = SymbolTable()
        # 'class' className '{' classVarDec* subroutineDec* '}'
        # class
        self.getNext()
        # className
        self.className = self.tokenizer.getToken()
        self.getNext()
        # {
        self.getNext()

        token = self.tokenizer.getToken()
        while token in ["static", "field"]:
            self.compileDec()
            token = self.tokenizer.getToken()

        token = self.tokenizer.getToken()
        while token in ["constructor", "function", "method"]:
            self.compileSubroutine()
            token = self.tokenizer.getToken()
        # }
        self.getNext()
Example #19
0
    def visit_Fundef(self, node):
        symbol = self.scope.getDirect(node.name)
        if symbol is None:
            self.scope.put(node.name, node)
        else:
            print("Symbol {0} already defined at {1}:{2}. First defined at {3}:{4}".format(
                node.name, node.pos[0], node.pos[1], symbol.pos[0], symbol.pos[1]))

        # Create new scope for function
        self.scope = SymbolTable(self.scope, node.name)
        # Leave information about return type of the function
        self.return_type = node.return_type
        self.returned = False
        for argument in node.arguments:
            argument.accept(self)
        node.body.accept(self)

        if not self.returned:
            print("No return statement found in function {0} defined at {1}:{2}".format(node.name, node.pos[0], node.pos[1]))
        # Clear information about return type
        self.return_type = None
        self.returned = False
        # Get the hell out of function scope, after its done
        self.scope = self.scope.parent
Example #20
0
def main():
    #make sure they used the program right
    if len(sys.argv) != 2:
        print("usage: python assempler.py <some .asm file>")
        return

    #get the path to the asm file
    path = sys.argv[1]

    #make sure it is an asm file
    if path.split('.')[1] != 'asm':
        print("Error: you did not supply an asm file")

    #parse the asm file to get the instructions in a good format to parse
    instructions = []
    file = open(path, 'r')

    #create modules and keep track of current ram address for symbols
    parser = Parser(file)
    code = Code()
    symbolTable = SymbolTable()
    symbolEntry = 16

    #holds the binary output lines
    output = []

    #first pass to add L command labels to symbol table
    rom = 0
    while parser.hasMoreCommands():
        if parser.commandType() == "L_COMMAND":
            print(parser.symbol())
            symbolTable.addEntry(parser.symbol(), rom)
        else:
            rom += 1

        parser.advance()

    #reset the parser for 2nd pass
    parser.reset()

    #2nd pass
    while parser.hasMoreCommands():
        #get command type and create a command to output
        iType = parser.commandType()
        command = None
        print(parser.currentInstruction)
        if iType == 'C_COMMAND':
            #get all parts of c command in binary
            dest = code.dest(parser.dest())
            comp = code.comp(parser.comp())
            jump = code.jump(parser.jump())

            #error check
            if dest is None or comp is None or jump is None:
                print("Error: invalid dest, comp, or jump")
                return
            else:
                command = '111' + comp + dest + jump
        elif iType == 'A_COMMAND':
            #get symbol and error check
            symbol = parser.symbol()
            if symbol is None:
                print("Error: invalid symbol declaration")
                return

            #just convert to binary if integer
            if isInt(symbol):
                command = decimalToBinary(symbol)
            else:
                #if the symbol isnt in the symbol table add it
                if not symbolTable.contains(symbol):
                    symbolTable.addEntry(symbol, symbolEntry)
                    symbolEntry += 1
                #convert address from symbol table to binary
                command = decimalToBinary(symbolTable.getAddress(symbol))
        #since l commands are already handles, dont do anything
        elif iType == 'L_COMMAND':
            parser.advance()
            continue

        #error check command and add to output
        if command is None:
            print("Error: binary string longer than 16bits")
            return
        else:
            output.append(command)

        #next line
        parser.advance()

    #write to file but change to .hack
    outputPath = os.path.splitext(path)[0] + '.hack'
    outfile = open(outputPath, 'w')

    for binary in output:
        outfile.write(binary + '\n')
Example #21
0
 def __init__(self):
     self.symbol_table = SymbolTable(None, "TypeChecker", {})
     self.ttypes = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None)))
     self.fill_ttypes()
from SymbolTable import SymbolTable

SymbolT = SymbolTable()
SymbolT.print()
SymbolT.insert(id="var", tipo="int", attributes=None)
SymbolT.print()
SymbolT.insert(id="a", tipo="double", attributes=None)
SymbolT.insert(id="function", tipo="void", attributes=None)
SymbolT.print()
print(SymbolT.lookup(id="a"))
Example #23
0
class TypeChecker(NodeVisitor):

    def visit_BinExpr(self, node):
        if isinstance(node.left, str):
            type1 = node.left
        else:
            type1 = self.visit(node.left)     # type1 = node.left.accept(self)
        if isinstance(node.right, str):
            type2 = node.right
        else:
            type2 = self.visit(node.right)    # type2 = node.right.accept(self)
        op = node.op
        if ttype[op][type1][type2] is None:
            print("Wrong expression " + op + " in line: " + self.my_str(node.line))
        return ttype[op][type1][type2]

    def visit_UnaryExpr(self, node):
        return self.visit(node.expr)

    def visit_PrintInstruction(self, node):
        self.visit(node.expr_list)

    def visit_FunctionExpression(self, node):
        function_definition = self.symbol_table.getGlobal(node.name)
        if function_definition is None or not isinstance(function_definition, FunctionSymbol):
            print("function " + node.name + " is not defined" + " in line: " + self.my_str(node.line))
        else:
            if node.expr is not None:
                types = [self.visit(child) for child in node.expr.children]
                declared_types = function_definition.args
                if len(types) != len((declared_types)):
                    print("Wrong arguments in function " + node.name + " in line: " + self.my_str(node.line))
                else:
                    for given_type, declared_type in zip(types, declared_types):
                        if given_type != declared_type:
                            print("Mismatching argument types Expected " + self.my_str(declared_type) + ", got " + self.my_str(given_type) + "in line: " + self.my_str(node.line))
            elif function_definition.args != []:
                print("Worng number of arguments in function: " + node.name + "in line: " + self.my_str(node.line))
            return function_definition.type


    def visit_Variable(self, node):
        dec = self.symbol_table.getGlobal(node.name)
        if dec is None:
            print("Undefined symbol: " + node.name + "in line: " + self.my_str(node.line))
        else:
            return dec.type

    def visit_WhileInstr(self, node):
        self.is_in_loop = True
        self.visit(node.condition)
        self.visit(node.instruction)
        self.is_in_loop = False

    def visit_RepeatInstr(self, node):
        self.is_in_loop = True
        self.visit(node.condition)
        self.visit(node.instructions)
        self.is_in_loop = False

    def visit_Return_instr(self, node):
        if self.current_function is None:
            print("Return placed outside of a function in line " + self.my_str(node.line))
        else:
            type = self.visit(node.expression)
            if type != self.current_function.type:
                print("Expected reutrn type " + self.my_str(self.current_function.type) + " actual" + self.my_str(type)+ "in line: " + self.my_str(self.my_str(node.line)))

    def visit_Fundef(self, node):
        if self.symbol_table.get(node.id):
            print("Function " + node.id + "already defined" + "in line: " + self.my_str(node.line))
        else:
            function = FunctionSymbol(node.id, node.type, SymbolTable(self.symbol_table, node.id))
            self.symbol_table.put(node.id, function)
            self.current_function = function
            self.symbol_table = self.current_function.symbol_table
            if node.arg_list is not None:
                self.visit(node.arg_list)
            self.visit(node.compound_instr)
            self.symbol_table = self.symbol_table.getParentScope()
            self.current_function= None

    def visit_Arg(self, node):
        if self.symbol_table.get(node.id) is not None:
            print("Double argument in function: " + node.id + "in line: " + self.my_str(node.line))
        else:
            self.symbol_table.put(node.id, VariableSymbol(node.id, node.type))
            self.current_function.put_arg(node.type)

    def visit_RelExpr(self, node):
        type1 = self.visit(node.left)     # type1 = node.left.accept(self)
        type2 = self.visit(node.right)    # type2 = node.right.accept(self)
        # ...
        #

    def visit_Integer(self, node):
        return 'int'

    def visit_Float(self, node):
        return 'float'

    def visit_String(self, node):
        return 'string'

    def visit_Init(self, node):
        given_type = self.visit(node.expression)
        if given_type == self.current_type or (given_type == "int" and self.current_type =="float"):
            if self.symbol_table.get(node.id) is not None:
                print("The" +  node.id + "was already defined" + "in line: " + self.my_str(node.line))
            else:
                self.symbol_table.put(node.id, VariableSymbol(node.id, self.current_type))
        else:
            print("Forbidden type assignment " + self.my_str(given_type) + " to " + self.my_str(self.current_type)+ "in line: " + self.my_str(node.line))

    def visit_CompoundInstr(self, node):
        self.symbol_table = SymbolTable(self.symbol_table, "inner")
        self.visit(node.declarations)
        self.visit(node.instructions_opt)
        self.symbol_table = self.symbol_table.getParentScope()


    def visit_ChoiceInstr(self, node):
        self.visit(node.condition)
        self.visit(node.instruction)
        if node.elseInstruction is not None:
            self.visit(node.elseInstruction)

    def visit_Assignment(self, node):
        definition = self.symbol_table.getGlobal(node.id)
        type = self.visit(node.expression)
        if definition is None:
            print("Used undefined symbol " + node.id + "in line: " + self.my_str(node.line))
        elif type != definition.type and (definition.type != "float" and definition != "int"):
            print("Bad assignment of " + self.my_str(type) + " to " +  self.my_str(definition.type) + "in line: " + self.my_str(node.line))

    def visit_Block(self, node):
        self.visit(node.block)

    def visit_Declaration(self,node):
        self.current_type = node.type
        self.visit(node.inits)
        self.current_type = ""

    def visit_ContinueInstr(self, node):
        if self.is_in_loop == False:
            print("Continue instr used outsied of function")


    def visit_BreakInstr(self, node):
        if self.is_in_loop == False:
            print("Break instr used outsied of function")

    def visit_Program(self, node):
        self.visit(node.blocks)

    def my_str(self, s):
        return 'None' if s is None else str(s)
Example #24
0
class CompilationEngine():
    OPERATORS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']

    def __init__(self, token_file, output_file):
        """
            Creates a new compilation engine with
            the given input and output.
            The next routine called must be compileClass.
        """
        if os.path.exists(output_file):
            os.remove(output_file)

        self.input = open(token_file, 'r')
        self.output = open(output_file, 'a+')
        self.current_line = self.input.readline()
        self.symbol_table = None
        self.code_writer = VMWriter(output_file)
        self.label_counter = 0
        
        self._compile()

    def _compile(self):
        """
            Compiles the whole Jack program.
        """
        # Pula a primeira linha, que identifica o arquivo de tokens
        # Percorre o arquivo até o fim
        self.current_line = self.input.readline()
        while "</tokens>" not in self.current_line:
            self.compileClass()

    def _identify_key(self, line):
        tag_end = line.find('>')
        return line[1:tag_end]

    def _identify_value(self, line):
        first_tag_end = line.find('> ')
        last_tag_start = line.find(' </')
        return line[first_tag_end+2:last_tag_start]

    def _skipLine(self):
        self.current_line = self.input.readline()

    def _generateLabel(self):
        label = "L{}".format(self.label_counter)
        self.label_counter += 1
        return label

    def compileClass(self):
        """
            Compiles a complete class.
        """
        # Cada classe nova deve ter uma symbol table nova
        self.symbol_table = SymbolTable()

        # Avança a linha <keyword> class </keyword>
        self._skipLine()
        # Grava e avança o nome da classe <identifier> nome </identifier>
        name = self._identify_value(self.current_line)
        self._skipLine()
        # Avança o símbolo de início da classe <symbol> { </symbol>
        self._skipLine()

        self.compileClassVarDec()
        self.compileSubroutineDec(name)

        # Avança o símbolo de fechamento da classe <symbol> } </symbol>
        self._skipLine()

    def compileClassVarDec(self):
        """
            Compiles a static variable declaration,
            or a field declaration.
        """
        # Escreve múltiplas declarações de variável seguidas
        while self._identify_value(self.current_line) in ["var", "static", "field"]:
            # Grava e avança a declaração do dado
            kind = self._identify_value(self.current_line)
            self._skipLine()
            # Grava e avança o tipo de dado
            type = self._identify_value(self.current_line)
            self._skipLine()

            # Escreve a declaração até que encontre o último caracter
            while self._identify_value(self.current_line) != ';':
                if self._identify_key(self.current_line) != "symbol":
                    # Se não for uma vírgula, é um novo nome de variável
                    # Grava e avança o nome
                    name = self._identify_value(self.current_line)
                    self._skipLine()
                    # Adiciona a variável à symbol table
                    self.symbol_table.define(name, type, kind)
                else:
                    # Se for uma vírgula, avança a linha
                    self._skipLine()

            # Avança o último caracter ;
            self._skipLine()

    def compileSubroutineDec(self, class_name):
        """
            Compiles a complete method, function,
            or constructor.
        """
        # Analisa múltiplos métodos ou funções seguidos
        while self._identify_value(self.current_line) in [
                "method", "function", "constructor"
            ]:
            # Cria uma nova symbol table para o escopo da subrotina
            self.symbol_table.startSubroutine()

            # Avança a declaração <keyword> function </keyword>
            self._skipLine()
            # Grava e avança o tipo de retorno <keyword> void </keyword>
            type = self._identify_value(self.current_line)
            self._skipLine()
            # Grava e avança o nome da função <identifier> nome </identifier>
            name = self._identify_value(self.current_line)
            self._skipLine()
            # Avança a declaração dos parâmetros <symbol> ( </symbol>
            self._skipLine()
            # Recebe e grava a quantidade de parâmetros na lista de parâmetros
            n_params = self.compileParameterList()
            # Avança a conclusão dos parâmetros <symbol> ) </symbol>
            self._skipLine()

            # Escreve a declaração da função no arquivo .vm
            self.code_writer.writeFunction(
                "{}.{}".format(class_name, name),
                n_params
            )

            self.compileSubroutineBody()

    def compileParameterList(self):
        """
            Compiles a (possibly empty) parameter
            list. Does not handle the enclosin "()".
        """
        parameters_count = 0

        # Escreve todas as linhas até encontrar o caracter de fim de parâmetros
        while self._identify_value(self.current_line) != ')':
            if self._identify_key(self.current_line) != "symbol":
                # Guarda e avança o tipo do argumento <keyword> int </keyword>
                type = self._identify_value(self.current_line)
                self._skipLine()
                # Guarda o nome do argumento <identifier> nome </identifier>
                name = self._identify_value(self.current_line)
                self._skipLine()
                # Adiciona o argumento à symbol table da subrotina
                self.symbol_table.define(name, type, "argument")
                # Aumenta a contagem de parâmetros
                parameters_count += 1
            else:
                # Avança a vírgula
                self._skipLine()

        return parameters_count

    def compileSubroutineBody(self):
        """
            Compiles a subroutine's body.
        """
        # Avança a abertura de bloco <symbol> { </symbol>
        self._skipLine()

        self.compileVarDec()
        self.compileStatements()

        # Avança o término do bloco <symbol> } </symbol>
        self._skipLine()

    def compileVarDec(self):
        """
            Compiles a var declaration.
        """
        # Escreve múltiplas declarações de variáveis seguidas
        while self._identify_value(self.current_line) == "var":
            # Grava e avança a declaração da variável <keyword> var </keyword>
            kind = self._identify_value(self.current_line)
            self._skipLine()
            # Grava e avança o tipo da variável <keyword> int </keyword>
            type = self._identify_value(self.current_line)
            self._skipLine()

            # Avança a declaração até que encontre o último caracter
            while self._identify_value(self.current_line) != ';':
                if self._identify_key(self.current_line) != "symbol":
                    # Se não for uma vírgula, é um novo nome de variável
                    # Grava e avança o nome da variável
                    name = self._identify_value(self.current_line)
                    self._skipLine()
                    # Adiciona a variável à symbol table
                    self.symbol_table.define(name, type, kind)
                else:
                    # Avança a vírgula
                    self._skipLine()

            # Avança o último caracter ;
            self._skipLine()

    def compileStatements(self):
        """
            Compiles a sequence os statements.
            Does not handle the enclosing "{}";
        """
        keyword = self._identify_value(self.current_line)

        # Verifica múltiplos statements
        while keyword in ["let", "if", "while", "do", "return"]:
            if keyword == "let":
                self.compileLet()
            elif keyword == "if":
                self.compileIf()
            elif keyword == "while":
                self.compileWhile()
            elif keyword == "do":
                self.compileDo()
            elif keyword == "return":
                self.compileReturn()

            keyword = self._identify_value(self.current_line)

    def compileLet(self):
        """
            Compiles a let statement.
        """
        # Avança a keyword <keyword> let </keyword>
        self._skipLine()
        # Grava e avança o nome da variável <identifier> nome </identifier>
        name = self._identify_value(self.current_line)
        self._skipLine()

        # Se tiver [, é de um array e deve conter uma expressão dentro
        if self._identify_value(self.current_line) == '[':
            # Avança a abertura de chave [
            self._skipLine()
            # Compila a expressão
            self.compileExpression()
            # Avança o fechamento de chave ]
            self._skipLine()

        # Avança a associação <symbol> = </symbol>
        self._skipLine()
        # Compila a expressão
        self.compileExpression()
        # Avança o fim da declaração <symbol> ; </symbol>
        self._skipLine()

        # Escreve o resultado da expressão na variável usando o pop
        kind = self.symbol_table.kindOf(name)
        index = self.symbol_table.indexOf(name)
        self.code_writer.writePop(kind, index)

    def compileIf(self):
        """
            Compiles an if statement,
            possibly with a trailing else clause.
        """
        else_label = self._generateLabel()
        end_label = self._generateLabel()

        # Avança a keyword <keyword> if </keyword>
        self._skipLine()
        # Avança o início da expressão <symbol> ( </symbol>
        self._skipLine()
        # Compila a expressão de verificação
        self.compileExpression()
        # Avança o fim da expressão <symbol> ) </symbol>
        self._skipLine()

        # Nega a expressão de verificação no arquivo .vm
        self.code_writer.writeArithmetic("~")
        # Redireciona para o else no arquivo .vm
        self.code_writer.writeIf(else_label)

        # Inicia o bloco do if <symbol> { </symbol>
        self._skipLine()
        while self._identify_value(self.current_line) != '}':
            self.compileStatements()
        # Avança o fim do bloco <symbol> } </symbol>
        self._skipLine()

        # Redireciona para o fim da verificação no .vm
        self.code_writer.writeGoto(end_label)
        # Escreve a label do else no arquivo .vm
        self.code_writer.writeLabel(else_label)

        # Confere se existe um bloco else
        if self._identify_value(self.current_line) == "else":
            # Avança o else <keyword> else </keyword>
            self._skipLine()
            # Avança o início do bloco <symbol> { </symbol>
            self._skipLine()
            # Escreve o conteúdo do bloco
            while self._identify_value(self.current_line) != '}':
                self.compileStatements()
            # Avança o fim do bloco <symbol> } </symbol>
            self._skipLine()

        # Escreve a label de fim de bloco
        self.code_writer.writeLabel(end_label)

    def compileWhile(self):
        """
            Compiles a while statement.
        """
        # Define as 2 labels necessárias
        start_label = self._generateLabel()
        end_label = self._generateLabel()

        # Escreve a label de início no arquivo .vm
        self.code_writer.writeLabel(start_label)

        # Avança o início da declaração <keyword> while </keyword>
        self._skipLine()
        # Avança o início da expressão <symbol> ( </symbol>
        self._skipLine()
        # Compila a expressão de verificação
        self.compileExpression()

        # Nega a expressão de verificação no arquivo .vm
        self.code_writer.writeArithmetic("~")
        # Verifica a expressão e escreve um if-goto no arquivo .vm
        self.code_writer.writeIf(end_label)

        # Avança o fim da expressão </symbol> ) </symbol>
        self._skipLine()
        # Avança o início do bloco e continua até o fim do mesmo
        self._skipLine()
        # Compila o conteúdo do while
        while self._identify_value(self.current_line) != '}':
            self.compileStatements()
        # Avança o fim do bloco <symbol> } </symbol>
        self._skipLine()

        # Escreve um goto no arquivo para voltar ao início do loop no .vm
        self.code_writer.writeGoto(start_label)
        # Escreve label final para sair do loop no .vm
        self.code_writer.writeLabel(end_label)


    def compileDo(self):
        """
            Compiles a do statement.
        """
        # Avança o comando <keyword> do </keyword>
        self._skipLine()
        # Identifica a função a ser chamada até o início dos parâmetros
        function = ""
        while self._identify_value(self.current_line) != '(':
            # Adiciona o valor para montar o nome da chamda
            function += self._identify_value(self.current_line)
            # Avança para o próximo valor
            self._skipLine()

        # Avança o início da lista de expressões <symbol> ( </symbol>
        self._skipLine()
        # Compila a lista de expressões
        n_args = self.compileExpressionList()
        # Avança o fim da lista <symbol> ) </symbol>
        self._skipLine()
        # Avança o fim do statement <symbol> ; </symbol>
        self._skipLine()

        # Escreve a chamada da função no arquivo .vm
        self.code_writer.writeCall(function, n_args)

        # Como a função 'do' não retorna nada, precisamos fazer um pop
        # do valor gerado para a pilha temporária
        self.code_writer.writePop("temp", 0)

    def compileReturn(self):
        """
            Compiles a return statement.
        """
        # Avança o ínicio da declaração <keyword> return </keyword>
        self._skipLine()
        if self._identify_key(self.current_line) != "symbol":
            # Compila a expressão de retorno
            self.compileExpression()
        else:
            # A função não retorna nada, mas é esperado um valor de retorno
            # Por isso informamos 0
            self.code_writer.writePush("constant", 0)
        # Avança o fim da declaração <symbol> ; </symbol>
        self._skipLine()

        # Escreve o comando de return no arquivo .vm
        self.code_writer.writeReturn()

    def compileExpression(self):
        """
            Compiles an expression.
        """
        # Sempre inicia com um termo
        self.compileTerm()

        # Verificamos a necessidade de outro termo
        operator = self._identify_value(self.current_line)
        if operator in self.OPERATORS:
            # Avança o operador
            self._skipLine()
            # Compila o próximo termo
            self.compileTerm()
            # Escreve a operação no arquivo
            self.code_writer.writeArithmetic(operator)

    def compileTerm(self):
        """
            Compiles a term. If the current token
            is an identifier, the routine must
            distinguish between a variable , an
            array entry, or a subroutine call. A
            single look-ahead token, which may be one of
            "[", "(", or ".", suffices to distinguish
            between the possibilities. Any other token is
            not part of this term and should not be advanced
            over.
        """
        if self._identify_key(self.current_line) == "identifier":
            # Pode ser um nome de variável ou uma chamada de função
            # var[expressao], funcao.chamada()
            # Por isso gravamos e avançamos o identificador e
            # verificamos por caracteres especiais
            name = self._identify_value(self.current_line)
            self._skipLine()

            if self._identify_value(self.current_line) == '.':
                # Se a linha for um símbolo . é uma chamada a uma função
                # Grava e avança o ponto
                name += "."
                self._skipLine()
                # Grava e avança o nome da função
                name += self._identify_value(self.current_line)
                self._skipLine()
                # Avança o símbolo de início da chamada (
                self._skipLine()
                # Se houver uma expressão dentro da chamada, compila
                # Se não, compila a lista em branco
                n_args = self.compileExpressionList()
                # Avança o símbolo de fim da chamada )
                self._skipLine()
                # Escreve a chamada da função no arquivo .vm
                self.code_writer.writeCall(name, n_args)
            elif self._identify_value(self.current_line) == '[':
                # Se a linha for um símbolo [ é um acesso ao array
                # Avança a chave [
                self._skipLine()
                # Compila a expressão dentro das chaves
                self.compileExpression()
                # Avança a chave ]
                self._skipLine()

                kind = self.symbol_table.kindOf(name)
                index = self.symbol_table.indexOf(name)
                # Escreve o push do array no arquivo .vm
                self.code_writer.writePush(kind, index)

                self.code_writer.writeArithmetic('+')
                self.code_writer.writePop('pointer', 1)
                self.code_writer.writePush('that', 0)
            else:
                # Faz o push do identifier no arquivo .vm
                kind = self.symbol_table.kindOf(name)
                index = self.symbol_table.indexOf(name)
                self.code_writer.writePush(kind, index)
        elif self._identify_value(self.current_line) == '(':
            # Avança a abertura de expressão (
            self._skipLine()
            # Compila a expressão
            self.compileExpression()
            # Avança o encerramento da expressão )
            self._skipLine()
        elif self._identify_key(self.current_line) == "keyword":
            # Faz o push do valor no arquivo .vm
            value = self._identify_value(self.current_line)
            if value == "true":
                self.code_writer.writePush("constant", 0)
                self.code_writer.writeArithmetic('~')
            elif value == "false":
                self.code_writer.writePush("constant", 0)
            self._skipLine()
        elif self._identify_key(self.current_line) == "stringConstant":
            # Grava a string
            string = self._identify_value(self.current_line)

            # Escreve o tamanho e chama a criação de string no arquivo .vm
            self.code_writer.writePush("constant", len(string))
            self.code_writer.writeCall("String.appendChar", 1)

            # Escreve o código e adiciona cada caracter no arquivo .vm
            for char in string:
                self.code_writer.writePush("constant", ord(char))
                self.code_writer.writeCall("String.appendChar", 2)
        elif self._identify_key(self.current_line) == "integerConstant":
            # Adiciona a constante à pilha
            num = self._identify_value(self.current_line)
            self.code_writer.writePush("constant", num)
            # Avança a linha
            self._skipLine()
        elif self._identify_value(self.current_line) in ['-', '~']:
            # É um operador unário e ainda tem outra parte do termo
            # depois dele, portanto escreve o operador e o próximo termo
            op = self._identify_value(self.current_line)
            op = op if op == '~' else 'neg'
            self._skipLine()
            self.compileTerm()
            self.code_writer.writeArithmetic(op)

    def compileExpressionList(self):
        """
            Compiles a (possibly empty) comma-separated
            list of expressions.
        """
        arguments_count = 0

        while self._identify_value(self.current_line) != ')':
            if self._identify_value(self.current_line) == ',':
                # Avança a vírgula
                self._skipLine()
            else:
                # Compila a expressão
                self.compileExpression()
                # Incrementa a contagem de argumentos
                arguments_count += 1

        return arguments_count
Example #25
0
class DecafSemanticChecker(DecafVisitor):
    def __init__(self):
        super().__init__()
        self.st = SymbolTable()
        # initialise an empty Symbol Table object

    def visitProgram(self, ctx: DecafParser.ProgramContext):
        self.st.enterScope()  # enter symbol table scope
        self.visitChildren(ctx)
        self.st.exitScope()

    def visitVar_decl(self, ctx: DecafParser.Var_declContext):
        # semantic rule: No identifier is declared twice in the same scope
        # test with testdata/semantics/illegal-01.dcf
        line_num = ctx.start.line
        for var_decl in ctx.ID():
            var_name = var_decl.getText()  # gets the variable name (eg. x)
            var_symbol = self.st.probe(
                var_name)  # search Symbol Table for variable entry

            if var_symbol != None:  # if variable does NOT exist in Symbol Table
                print('Error on line', line_num, 'variable \'', var_name,
                      '\' already declared on line', var_symbol.line)
            else:
                var_symbol = VarSymbol(id=var_name,
                                       type='int',
                                       line=line_num,
                                       size=8,
                                       mem=STACK)
                self.st.addSymbol(
                    var_symbol
                )  # add var_symbol to the scope (st abbreviation of SymbolTable)

        return self.visitChildren(ctx)

    def visitStatement(self, ctx: DecafParser.StatementContext):
        # semantic rule: No identifier is used before it is declared
        if ctx.location() != None:
            line_num = ctx.start.line
            var_name = ctx.location().ID().getText()

            var_symbol = self.st.lookup(var_name)

            if var_symbol == None:
                print('Error on line', line_num, 'variable \'', var_name,
                      '\'is not declared')

        self.visitChildren(ctx)

    # semantic rule: warn the user that any method defined after the main method will never be executed.

    # semantic rule: int_literal in an array declaration must be greater than 0
    def visitField_name(self, ctx: DecafParser.Field_nameContext):
        if ctx.int_literal() != None:
            if int(ctx.int_literal().DECIMAL_LITERAL().getText()) < 1:
                line_num = ctx.start.line
                var_name = ctx.ID().getText()
                print("Error on line", line_num, "variable '", var_name,
                      "' array size must be greater than 0")

        return self.visitChildren(ctx)

    # semantic rule 5: number and types of arguments in a method call must be the same as
    #   the number and types of the formals, i.e., the signatures must be identical.
    def visitMethod_decl(self, ctx: DecafParser.Method_declContext):
        method_name = ctx.ID()[0].getText()
        method_return_type = ctx.return_type().getText()
        line_num = ctx.start.line
        method_params = []
        for x in ctx.data_type():
            method_params.append(x.getText())  # get data type as a string
        method_symbol = MethodSymbol(
            id=method_name,
            type=method_return_type,
            line=line_num,
            params=method_params)  # create a method symbol with ctx values
        self.st.addSymbol(
            method_symbol
        )  # push method symbol with params list to global scope
        return self.visitChildren(ctx)

    def visitMethod_call(self, ctx: DecafParser.Method_callContext):
        # get method call
        line_num = ctx.start.line
        method_name = ctx.method_name().getText()
        # lookup method call name in symbol table
        method_symbol = self.st.lookup(method_name)
        method_symbol_params = method_symbol.params
        if len(ctx.expr()) != len(method_symbol_params):
            return print(
                "Error you passed an incorrect combination of parameters",
                "on line", line_num,
                ", the number and types of arguments in a method call must be the same as the number and types of the formals"
            )
        else:
            for i in range(max(len(method_symbol_params), len(ctx.expr()))):
                # check out of bound index
                if i >= len(method_symbol_params):
                    print(
                        "Error you passed an unexpected parameter",
                        ctx.expr()[i].literal().getText(), "on line", line_num,
                        ", the number and types of arguments in a method call must be the same as the number and types of the formals"
                    )
                else:
                    if method_symbol_params[i] == 'int':
                        if ctx.expr()[i].literal().int_literal() == None:
                            print(
                                "Error incorrect parameter data type expected",
                                method_symbol.type, "received value",
                                ctx.expr()[i].literal().getText(), "on line",
                                line_num,
                                ", the number and types of arguments in a method call must be the same as the number and types of the formals"
                            )
                    elif method_symbol_params[i] == 'boolean':
                        if ctx.expr()[i].literal().bool_literal() == None:
                            print(
                                "Error incorrect parameter date type expected",
                                method_symbol.type, "received",
                                ctx.expr()[i].literal(), "on line", line_num,
                                ", the number and types of arguments in a method call must be the same as the number and types of the formals"
                            )
                    else:
                        print(
                            "missing method_symbol_params with data type classification:",
                            method_symbol_params[i], " on line number",
                            line_num,
                            ", the number and types of arguments in a method call must be the same as the number and types of the formals"
                        )

        return self.visitChildren(ctx)
Example #26
0
 def test_symbol_table(self):
     table = SymbolTable()
     table.define("x", "int", KIND_STATIC)
     table.define("x", "int", KIND_ARGUMENT)
     self.assertEqual(table.var_count(KIND_STATIC), 1)
     self.assertEqual(table.var_count(KIND_ARGUMENT), 1)
     table.define("y", "int", KIND_VAR)
     self.assertEqual(table.var_count(KIND_VAR), 1)
     table.start_subroutine()
     self.assertEqual(table.var_count(KIND_ARGUMENT), 0)
     self.assertEqual(table.var_count(KIND_VAR), 0)
     self.assertEqual(table.var_count(KIND_STATIC), 1)
     table.define("x", "int", KIND_VAR)
     with self.assertRaises(ValueError):
         table.type_of('y')
     table.define("x2", "int", KIND_VAR)
     table.define("x3", "int", KIND_VAR)
     self.assertEqual(table.index_of('x'), 0)
     self.assertEqual(table.index_of('x2'), 1)
     with self.assertRaises(ValueError):
         table.define("x", "char", KIND_ARGUMENT)
Example #27
0
"""
Assembler
Translates HACK assembly into HACK machine code.

@author: Kyle June
"""
import sys
from Parser import Parser
import Code
from SymbolTable import SymbolTable

asmFilename = sys.argv[1]

# This goes through the file and adds the address for each label to the symbol table.
parser = Parser(asmFilename)
symbolTable = SymbolTable()
romAddress = 0
while parser.hasMoreCommands():
    parser.advance()
    if parser.commandType() == "L_COMMAND":
        symbolTable.addEntry(parser.symbol(), romAddress)
    else:
        romAddress += 1

# This opens the file that will be written to.
hackFilename = asmFilename[:-3] + "hack"
hackFile = open(hackFilename, "w")

# This writes the translated code to the hack file.
parser.restart()
ramAddress = 16
class DecafCodeGenVisitor(DecafVisitor):

    # Global variables, used to keep track of how many if statements, callouts and loops exist in the code.
    IF_LABEL_COUNT = 1
    CALLOUT_COUNT = 1
    LOOP_COUNT = 1

    # Constructor sets up the header of the assembly code.
    def __init__(self):
        super().__init__()
        self.st = SymbolTable()
        self.head = '.data\n'
        self.body = '.global main\n'

    # Visits the program node, ensures there is a main method.
    def visitProgram(self, ctx:DecafParser.ProgramContext):
        self.st.enterScope()
        self.visitChildren(ctx)
        method_symbol = self.st.lookup('main')
        params = []

        # Checks if main method has been declared and if it contains paramaters.
        if method_symbol == None:
            print('[Error]: No main method has been declared.')
        else:
            if len(params) != 0:
                print('[Error]: The main method cannot contain paramaters.')
        self.body += 'ret\n'
        self.st.exitScope()

    # Visits the method declaration node, checks if method is already declared and manages parameters.
    def visitMethod_decl(self, ctx:DecafParser.Method_declContext):
        method_name = ctx.ID(0).getText()
        return_type = ctx.TYPE(0)
        line_number = ctx.start.line

        # Checks if the method has already been declared.
        if self.st.probe(method_name) != None:
            print('[Error]: The method ' + method_name + ' on line: ' + line_number + 'was already declared!')
        else:
            self.body += method_name
            self.body += ':\n'

        params = []

        # Loops through paramaters and creates a var symbol for them and appends them to a list.
        if len(params) > 1:
            for param in range(len(ctx.ID())):
                param_name = ctx.ID(param).getText()
                params.append(param_name)
                var_symbol = self.st.probe(param_name)
                if var_symbol == None:
                    var_symbol = VarSymbol(id=param_name, type='int', line=ctx.start.line, size=8, mem=self.st.stack_pointer)
                    self.st.addSymbol(var_symbol)
                    var_addr = var_symbol.getAddr()
                    self.body += '\tmovq %rax, -' + str(var_addr[0]) + '(%rsp)\n'

            params.pop(0)

        method_symbol = MethodSymbol(id=method_name, type=return_type, line=line_number, params=params)
        self.st.addSymbol(method_symbol)

        visit = self.visitChildren(ctx)
        return visit

    # Visits block node, enters a new scope inside the block.
    def visitBlock(self, ctx:DecafParser.BlockContext):
        self.st.enterScope()
        visit = self.visitChildren(ctx)
        self.st.exitScope()
        return visit

    # Visits expression node, handles variable assignment.
    def visitExpr(self, ctx:DecafParser.ExprContext):

        # Expression is a variable.
        if ctx.location():
            var_name = ctx.location().getText()
            var_symbol = self.st.lookup(var_name)
            if "[" in var_name:
                split_var = var_name.split('[', 1)[0]
                var_symbol = self.st.lookup(split_var)
            if var_symbol == None:
                print('[Error]: Variable', var_name, 'has not been declared. Found on line', ctx.start.line)
            else:
                var_addr = var_symbol.getAddr()
                self.body += '\tmovq -' + str(var_addr[0]) + '(%rsp), %rax\n'

        # Expression is a literal (number or string/char)
        elif ctx.literal():
            number = ctx.literal().getText()
            if number == 'false':
                number = '0'
            if number == 'true':
                number = '1'
            self.body += '\tmovq $' + number + ', %rax\n'

        # Expression length is more than 1 (more expressions present such as an operation)
        elif len(ctx.expr()) > 1:
            # Visit the first expression.
            self.visit(ctx.expr(0))

            # Move stack pointer 1 place and save value of first expression.
            self.st.stack_pointer[-1] += 8
            self.body += '\tmovq %rax, ' + str(-self.st.stack_pointer[-1]) + '(%rsp)\n'

            # Visit the second expression.
            self.visit(ctx.expr(1))
            self.body += '\tmovq ' + str(-self.st.stack_pointer[-1]) + '(%rsp), %r10\n'
            self.st.stack_pointer[-1] -= 8
            self.body += '\tmovq %rax, %r11\n'

            # If a binary operator is present, check the operator and add appropriate code.
            if ctx.BIN_OP():
                if str(ctx.BIN_OP()) == '+':
                    self.body += '\taddq %r10, %r11\n'
                if str(ctx.BIN_OP()) == '*':
                    self.body += '\timul %r10, %r11\n'
                if str(ctx.BIN_OP()) == '-':
                    self.body += '\tsubq %r10, %r11\n'
                if str(ctx.BIN_OP()) == '/':
                    self.body += '\tmovq $0, rdx\n'
                    self.body += '\tmovq %r11, rbx\n'
                    self.body += '\tmovq %r10, rax\n'
                    self.body += '\tidiv %rbx\n'

            self.body += '\tmovq %r11, %rax\n'

    # Visits the variable declaration node, handles storage of variables and name checking.
    def visitVar_decl(self, ctx:DecafParser.Var_declContext):

        # Loops through all variables (to evaluate int x, y, z for example.)
        for i in range(len(ctx.ID())):
            var_name = ctx.ID(i).getText()
            var_symbol = self.st.probe(var_name)
            if "[" in var_name:
                array_var_name = ctx.ID(i).getText()
                split_var = array_var_name.split('[', 1)[0]
            else:
                if var_symbol == None:
                    var_symbol = VarSymbol(id=var_name, type='int', line=ctx.start.line, size=8, mem=self.st.stack_pointer)
                    self.st.addSymbol(var_symbol)
                    var_addr = var_symbol.getAddr()
                    self.body += '\tmovq %rax, -' + str(var_addr[0]) + '(%rsp)\n'
                else:
                    print('[Error]:', var_symbol.id + ', declared on line', ctx.start.line, 'has already been declared on line', var_symbol.line)

        visit = self.visitChildren(ctx)
        return visit

    # Visit the statement node, handles constructs such as IF statements and FOR loops.
    def visitStatement(self, ctx:DecafParser.StatementContext):
        if ctx.CONTINUE() != None:
            self.body += '\tjmp main\n'
        if ctx.BREAK() != None:
            self.body += '\tjmp main\n'
        if ctx.IF():
            self.st.enterScope()
            if_label = 'if-label-'+str(self.IF_LABEL_COUNT)
            self.body += '\tcmp %r11 %r10\n'
            self.body += '\tjl '+if_label+'l\n'
            self.body += '\tje '+if_label+'e\n'
            self.body += '\tjg '+if_label+'g\n'
            self.body += '\tret\n'
            self.body += if_label+':\n'
            self.IF_LABEL_COUNT = self.IF_LABEL_COUNT + 1
            ctx.expr()
            self.st.exitScope()
        if ctx.RETURN():
            if ctx.expr():
                return_value = str(ctx.expr(0).getText())
                self.body += '\tmovq $'+return_value+', %rax\n'
                self.body += '\tret\n'
            else:
                self.body += '\tret\n'
        if ctx.FOR():
            self.st.enterScope()
            start_value = ctx.expr(0)
            end_value = ctx.expr(1)
            self.body += '\tmovq $1, %rbx\n'
            self.body += '\tjmp begin-for-'+str(self.LOOP_COUNT)+'\n'
            self.body += 'begin-for-'+str(self.LOOP_COUNT)+':\n'
            self.body += '\tcmp $'+str(end_value)+ ', %rbx\n'
            self.body += '\tjge end-for-'+str(self.LOOP_COUNT)+'\n'
            visit = self.visitChildren(ctx)
            self.body += '\taddq $1, %rbx\n'
            self.body += '\tjmp begin-for-'+str(self.LOOP_COUNT)+'\n'
            self.body += 'end-for-'+str(self.LOOP_COUNT)+':\n'
            self.body += '\tret\n'

            self.LOOP_COUNT = self.LOOP_COUNT + 1
            self.st.exitScope()

        visit = self.visitChildren(ctx)
        return visit

    # Visit field declaration node, handles assignment of arrays.
    def visitField_decl(self, ctx:DecafParser.Field_declContext):
        for i in range(len(ctx.field_name())):
            var_name = ctx.field_name(i).getText()
            var_symbol = self.st.probe(var_name)

            # Declaration is an array.
            if "[" in var_name:
                array_var_name = ctx.field_name(i).getText()
                split_var = array_var_name.split('[', 1)[0]
                if var_symbol == None:
                    var_symbol = VarSymbol(id=split_var, type='int', line=ctx.start.line, size=8, mem=self.st.stack_pointer)
                    self.st.addSymbol(var_symbol)
                    var_addr = var_symbol.getAddr()
                    self.body += '\tmovq %rax, -' + str(var_addr[0]) + '(%rsp)\n'
            else:
                if var_symbol == None:
                    var_symbol = VarSymbol(id=var_name, type='int', line=ctx.start.line, size=8, mem=self.st.stack_pointer)
                    self.st.addSymbol(var_symbol)
                    var_addr = var_symbol.getAddr()
                    self.body += '\tmovq %rax, -' + str(var_addr[0]) + '(%rsp)\n'
                else:
                    print('[Error]:', var_symbol.id + ', declared on line', ctx.start.line, 'has already been declared on line', var_symbol.line)
        visit = self.visitChildren(ctx)
        return visit

    # Visit method call node, checks if method exists.
    def visitMethod_call(self, ctx:DecafParser.Method_callContext):
        method_name = ctx.method_name()
        method_symbol = self.st.lookup(method_name)
        if not ctx.callout_arg():
            if method_symbol == None:
                print('[Error]: Call to a function that does not exist: ' + str(method_name) + ' on line: ' + str(ctx.start.line))
            else:
                self.body += '\tjmp '+method_name+'\n'
        visit = self.visitChildren(ctx)
        return visit

    # Visits callout arg node, handles adding strings to the head and printing text.
    def visitCallout_arg(self, ctx:DecafParser.Callout_argContext):
        self.head += 'string'+str(self.CALLOUT_COUNT)+': .asciz '+str(ctx.STRING_LITERAL())+'\n'
        self.body += '\tmovq $'+str(self.CALLOUT_COUNT)+', %rdi\n'
        self.body += '\tsubq $8, %rsp\n'
        self.body += '\tcall printf\n'
        self.body += '\taddq $8, %rsp\n'
        self.CALLOUT_COUNT = self.CALLOUT_COUNT + 1

        visit = self.visitChildren(ctx)
        return visit
 def __init__(self):
     super().__init__()
     self.st = SymbolTable()
     self.head = '.data\n'
     self.body = '.global main\n'
Example #30
0
class Parser:
    """
    Encapsulates access to the input code.
    Reads an assembly language command, parses it, and provides convenient
    access to the commands components (fields and symbols).
    In addition, removes all white space and comments.
    """

    # init -> line sectioning -> read_labels -> read_A / read_C

    def __init__(self, file_name: str):
        """
        Opens the input file/stream and gets ready to parse it.
        :param file_name: file name to open
        """
        self.file = open(file_name)
        self.lines = self.file.readlines()
        self.clean_comments()

        self.current_line_index = 0
        self.current_line = self.lines[self.current_line_index]
        self.line_amount = len(self.lines)

        self.binary_lines = list()
        self.symbol_table = SymbolTable()
        self.labels = dict()

        self.next_address = FIRST_ADDRESS

    def clean_comments(self):
        """
        method to clean the comments from the code lines
        :return: none
        """
        new_lines = list()
        for line in self.lines:
            if ((not line.startswith("//")) & (not line.isspace()) &
                (not line.startswith("/*") & (not line.startswith("*/")))):
                new_lines.append(line)
        self.lines = new_lines

    def parse(self):
        """
        method to parse out the file
        :return: none
        """
        self.read_labels()
        self.read_instructions()
        return self.binary_lines

    def strip_line(self):
        """
        method to strip \n or \t or spaces in line
        :return: none
        """
        self.current_line = self.current_line.strip()
        self.current_line = self.current_line.rstrip('\n')
        self.current_line = self.current_line.rstrip('\t')
        self.current_line = (self.current_line.split("//"))[0]
        self.current_line = self.current_line.replace(" ", "")

    def line_sectioning(self):
        """
        method to section lines of file by variable lines or label lines
        :return: none
        """
        # start sectioning
        while self.current_line is not None:
            # stripping from whitespaces and end line
            self.strip_line()
            self.lines[self.current_line_index] = self.current_line
            self.advance()

    def has_more_commands(self):
        """
        Are there more commands in the input?
        :return: true if has, false otherwise
        """
        if self.current_line_index + 1 < self.line_amount:
            return True
        return False

    def advance(self):
        """
        Reads the next command from
        the input and makes it the current command. Should be called only
        if hasMoreCommands() is true. Initially there is no current command.
        :return: none
        """
        if not self.has_more_commands():
            self.current_line = None
        while self.has_more_commands() & (self.current_line_index + 1 <
                                          self.line_amount):
            self.current_line_index = self.current_line_index + 1
            self.current_line = self.lines[self.current_line_index]
            # we shall not ignore:
            if not self.check_ignored_line():
                break

    def check_ignored_line(self):
        """
        check if a line should be ignored
        :return: true for ignored, false otherwise
        """
        if self.current_line.isspace():
            return True
        elif self.current_line.startswith(COMMENT):
            return True
        else:
            return False

    def get_address_for_symbol(self):
        """
        get next free address for a symbol in symbol table
        :return: int of address
        """
        address = self.next_address
        symbol_table = self.symbol_table

        while symbol_table.is_occupied(address):
            address = address + 1
        self.next_address = address + 1
        return address

    def read_labels(self):
        """
        read all labels inside the asm file
        and update symbol table accordingly
        :return: none
        """
        index = 0
        for line in self.lines:
            # if we are on a label:
            if Parser.line_command_type(line) == CommandType.L_COMMAND:
                # remove "(" and ")" from beginning and end.
                length_of_name = len(line)
                label_name = line[1:length_of_name - 1]
                # add to symbol table
                binary = Parser.decimal_to_binary(index)
                self.labels[label_name] = binary
                # label index is the number of line
                # the binary code of the label is its address
            else:
                index = index + 1

    @staticmethod
    def from_array_to_string(binary_string):
        """
        method to turn from an array of {0,1} into a string of {1,0}
        :param binary_string: binary array string
        :return: binary string
        """
        binary = ""
        binary = binary.join(binary_string)
        return binary

    def read_instructions(self):
        """
        method to read instructions of asm file
        :return: none
        """
        # instruction_lines: keys- line index, items- the line itself
        for instruction in self.lines:
            if Parser.line_command_type(instruction) == CommandType.A_COMMAND:
                binary = self.read_A_instruction(instruction)
                binary = Parser.from_array_to_string(binary)
                self.binary_lines.append(binary)
            elif Parser.line_command_type(instruction) == \
                    CommandType.C_COMMAND:
                binary = self.read_C_instruction(instruction)
                binary = Parser.from_array_to_string(binary)
                self.binary_lines.append(binary)

    def read_A_instruction(self, line: str):
        """
        read A instruction inside the asm file
        and update symbol table accordingly
        :return: binary code of line
        """
        if line[1].isdecimal():
            binary = Parser.decimal_to_binary(int(line[1:]))
        elif self.symbol_table.contains(line[1:]):
            binary = Parser.decimal_to_binary(
                self.symbol_table.get_address(line[1:]))
            binary = Parser.from_array_to_string(binary)
        elif line[1:] in self.labels.keys():
            binary = self.labels.get(line[1:])
        else:
            address = self.get_address_for_symbol()
            self.symbol_table.add_entry(line[1:], address)
            binary = Parser.decimal_to_binary(address)
        return Parser.from_array_to_string(binary)

    @staticmethod
    def read_C_instruction(line: str):
        """
        read C instruction inside the asm file
        and update symbol table accordingly
        :return: binary code of line
        """
        t1, t2, t3 = Parser.parse_C_command(line)

        # constructing string
        binary = ""

        # if (t2.isnumeric()) & (t1 == "A"):
        #    binary = (bin(int(t2)))[2:]
        #    rest = (16 - len(binary))*"0"
        #    binary = rest + binary
        #    return binary
        # else:

        # COMP, DESTINATIONS,JUMPS are dictionaries of fitting given commands
        binary = binary + COMP[t2]
        binary = binary + DESTINATIONS[t1]
        binary = binary + JUMPS[t3]
        return binary

    def read_L_instruction(self, label_name: str):
        """
        read L instruction inside the asm file
        :return: binary code of line
        """
        address = self.symbol_table.get_address(label_name)
        binary = Parser.decimal_to_binary(address)
        return Parser.from_array_to_string(binary)

    @staticmethod
    def decimal_to_binary(address):
        # because A instruction, first bit is 0->(15 bits we will display)
        binary = ['0'] * SIXTEEN
        # turns address into bin representation, for int.
        binary_address = (bin(address))[2:]
        binary_length = len(binary_address)
        from_index = SIXTEEN - binary_length
        # padding the string into binary.
        binary[from_index:SIXTEEN] = binary_address
        return binary

    def command_type(self):
        """
        Returns the type of the current command:
        - A_COMMAND for @Xxx where Xxx is either a symbol or a decimal number
        - C_COMMAND for dest=comp;jump
        - L_COMMAND (actually, pseudo- command) for (Xxx) where Xxx is a
        symbol.
        :return: enum of command type
        """
        if self.current_line.startswith(LABEL_START):
            return CommandType.L_COMMAND
        elif self.current_line.startswith(A_COMMAND_START):
            return CommandType.A_COMMAND
        else:
            # already deleted comments, so no need to check for that
            return CommandType.C_COMMAND

    @staticmethod
    def line_command_type(line: str):
        """
        Returns the type of the current command, out of the line itself.
        - A_COMMAND for @Xxx where Xxx is either a symbol or a decimal number
        - C_COMMAND for dest=comp;jump
        - L_COMMAND (actually, pseudo- command) for (Xxx) where Xxx is a
        symbol.
        :return: enum of command type
        """
        if line.startswith(A_COMMAND_START):
            return CommandType.A_COMMAND
        elif line.startswith(LABEL_START):
            return CommandType.L_COMMAND
        else:
            return CommandType.C_COMMAND

    @staticmethod
    def parse_C_command(line):
        """
        parsing a c command method, into 3 sections- dest, comp and jmp.
        :param line: given line to parse into a c command
        :return: a triplet of (dest, comp, jump)
        """
        # line looks like dest=comp;jmp

        # gives us [dest=comp],[jmp]
        # in an array
        # then we split second array to
        # [dest][comp]
        split_by_comma = line.split(';')
        split_by_equal = split_by_comma[0].split('=')
        if len(split_by_comma) == 1:
            # no jmp
            return split_by_equal[0], split_by_equal[1], None
        elif len(split_by_equal) == 1:
            # no dest
            return None, split_by_comma[0], split_by_comma[1]
        else:
            # dest and jmp
            return split_by_equal[0], split_by_equal[1], split_by_comma[1]
Example #31
0
class CompilationEngine:
    def __init__(self, tokenizer: JackTokenizer, jack_file):
        self.tokenizer = tokenizer
        self.class_name = ''
        log_file_name = jack_file.name.replace('.jack', '_engine.xml')
        self.log_file = open(log_file_name, 'w')
        log_file_name = jack_file.name.replace('.jack', '.vm')
        self.output_file = open(log_file_name, 'w')
        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(self.output_file)
        self.while_label_index = 0
        self.if_label_index = 0

    def compile(self):
        self.compile_class(0)

    def advance(self):
        """return current token"""
        return self.tokenizer.advance()

    def next(self) -> Token:
        return self.tokenizer.next()

    def compile_token(self, token, indentation, limits=None):
        print(token.content, end='  ')
        if limits is not None:
            if isinstance(limits, list) and token.token_type not in limits:
                raise RuntimeError(token, 'can be only', limits)
            if isinstance(limits, str) and token.content != limits:
                raise RuntimeError(token, 'can be only', limits)
        self.log(token, indentation)

    def log_node(self, msg, indentation):
        space = ''
        for i in range(0, indentation):
            space += '  '
        self.log_file.write('{1}<{0}>\n'.format(msg, space))

    def log(self, token, indentation):
        txt = token.content
        if txt == '<':
            txt = '&lt;'
        elif txt == '>':
            txt = '&gt;'
        elif txt == '\"':
            txt = '&quot;'
        elif txt == '&':
            txt = '&amp;'
        space = ''
        for i in range(0, indentation):
            space += '  '  # 2 spaces
        self.log_file.write('{2}<{0}> {1} </{0}>\n'.format(
            token.token_type, txt, space))

    def compile_class(self, indentation):
        """
        Compiles a complete class.
        """
        self.log_file.write('<class>\n')
        # 'class'
        advance = self.advance()
        self.compile_token(advance, indentation + 1)
        # class name
        advance = self.advance()
        self.class_name = advance.content
        self.compile_token(advance, indentation + 1)
        # set class name to vm-writer
        self.vm_writer.set_class_name(advance.content)
        # {
        advance = self.advance()
        self.compile_token(advance, indentation + 1, "{")
        # classVarDec* subroutineDec*
        advance = self.advance()
        while advance.content != '}':
            if (advance.content == 'constructor'
                    or advance.content == 'function'
                    or advance.content == 'method'):
                self.compile_subroutine(advance, indentation + 1)
            elif advance.content in ['field', 'static']:
                self.compile_class_var_dec(advance, indentation + 1)
            elif advance.content != '}':
                raise RuntimeError(
                    advance,
                    'Only subroutine and variable can be declared here')
            advance = self.advance()
        # }
        self.compile_token(advance, indentation + 1, '}')
        self.log_file.write('</class>\n')
        self.log_file.flush()
        print("\ncompilation success")
        return

    def compile_class_var_dec(self, token, indentation):
        """
        passing token as an argument, because the caller has already called the advance function once
        Compiles a static declaration or a field declaration.
        """
        self.log_node('classVarDec', indentation)
        # static or field
        kind = token.content.upper()
        self.compile_token(token, indentation + 1)
        token = self.advance()
        var_type = token.content
        self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
        # var name
        token = self.advance()
        var_name = token.content
        self.compile_token(token, indentation + 1, [IDENTIFIER])
        self.symbol_table.define(var_name, var_type, kind)
        token = self.advance()
        while token.content == ',':
            self.compile_token(token, indentation + 1, ',')
            token = self.advance()
            var_name = token.content
            self.symbol_table.define(var_name, var_type, kind)
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            token = self.advance()
        # ;
        self.compile_token(token, indentation + 1, ';')
        self.log_node('/classVarDec', indentation)
        return

    def compile_subroutine(self, token, indentation):
        """
        Compiles a complete method, function, or constructor.
        """
        # reset symbol table for subroutine
        self.symbol_table.start_subroutine()

        self.log_node('subroutineDec', indentation)
        # function/method/constructor
        function_type = token.content
        self.compile_token(token, indentation + 1)
        # void | type
        token = self.advance()
        self.compile_token(token, indentation + 1)
        # subroutine name
        token = self.advance()
        subroutine_name = token.content
        self.compile_token(token, indentation + 1)
        # (
        token = self.advance()
        self.compile_token(token, indentation + 1)
        # parameter list exists
        if function_type == 'method':
            self.symbol_table.define('this_placeholder', "THIS", ARG)
            pass
        token = self.advance()
        self.compile_parameter_list(token, indentation + 1)
        if token.content != ')':
            token = self.advance()
        # )
        self.compile_token(token, indentation + 1, ')')
        #  {
        token = self.advance()
        self.compile_subroutine_body(token, indentation + 1, subroutine_name,
                                     function_type)
        self.log_node('/subroutineDec', indentation)
        return

    def compile_subroutine_body(self,
                                token,
                                indentation,
                                subroutine_name,
                                function_type='function'):
        self.log_node('subroutineBody', indentation)
        self.compile_token(token, indentation + 1, '{')
        token = self.advance()
        n_locals = 0
        if token.content == 'var':
            n_locals = self.compile_var_dec(token, indentation + 1)
            token = self.advance()
        self.vm_writer.write_functions(subroutine_name, n_locals)

        # todo 处理constructor
        if function_type == 'constructor':
            # number of fields
            self.vm_writer.write_push('CONST',
                                      self.symbol_table.var_count(FIELD))
            self.vm_writer.write_call('Memory.alloc', 1)
            self.vm_writer.write_pop('POINTER', 0, 'set this pointer')
        elif function_type == 'method':
            # if it is a method, always set arg 0 to pointer 0(this)
            self.vm_writer.write_push(ARG, 0)
            self.vm_writer.write_pop('POINTER', 0)
            pass

        # if this token is '}' means the function has an empty body
        if token.content == '}':
            # TODO 空函数体的处理
            # empty body
            print('empty body', token)
            pass
        else:
            self.compile_statements(token, indentation + 1)
            token = self.advance()
        self.compile_token(token, indentation + 1, '}')
        self.log_node('/subroutineBody', indentation)

    def compile_parameter_list(self, token, indentation):
        """Compiles a (possibly empty) parameter list, not including the enclosing ‘‘ () ’’."""
        self.log_node('parameterList', indentation)
        while token.content != ')':
            param_symbol = Symbol()
            param_symbol.kind = ARG
            # parameter type
            self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
            param_symbol.symbol_type = token.content
            # parameter name
            token = self.advance()
            self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
            param_symbol.name = token.content
            self.symbol_table.define_symbol(param_symbol)
            if self.next() is not None and self.next().content == ',':
                # compile ,
                token = self.advance()
                self.compile_token(token, indentation + 1)
                token = self.advance()
                continue
            elif self.next() is not None and self.next().content == ')':
                # this function does not consumes ')' so didn't call advance()
                break
            else:
                token = self.advance()
        self.log_node('/parameterList', indentation)
        return

    def compile_var_dec(self, token, indentation) -> int:
        """  Compiles a var declaration."""

        # var_symbol = Symbol()
        # # var
        # self.compile_token(token, indentation + 1, 'var')
        # var_symbol.kind = VAR
        # # var type
        # token = self.advance()
        # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
        # var_symbol.symbol_type = token.content
        # # var name
        # token = self.advance()
        # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
        # var_symbol.name = token.content
        # # , or ;
        # token = self.advance()
        # while token.content != ';':
        #     self.compile_token(token, indentation + 1, ',')
        #     token = self.advance()
        #     self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
        #     token = self.advance()
        # self.compile_token(token, indentation + 1, ';')
        var_count = 0
        while token.content == 'var':
            self.log_node('varDec', indentation)
            var_count += 1
            var_symbol = Symbol()
            # var
            self.compile_token(token, indentation + 1, 'var')
            var_symbol.kind = VAR
            # var type
            token = self.advance()
            self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
            var_symbol.symbol_type = token.content
            # var name
            token = self.advance()
            self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD])
            var_symbol.name = token.content
            self.symbol_table.define_symbol(var_symbol)
            # next token may be ',' or ';'
            token = self.advance()
            # if next token is ','
            while token.content == ',':
                var_count += 1
                self.compile_token(token, indentation + 1, ',')
                # var name
                token = self.advance()
                self.compile_token(token, indentation + 1, [IDENTIFIER])
                # only name differs, types are the same
                self.symbol_table.define(token.content, var_symbol.symbol_type,
                                         VAR)
                token = self.advance()
            if token.content == ';':
                self.compile_token(token, indentation + 1, ';')
            if self.next().content == 'var':
                token = self.advance()
            self.log_node('/varDec', indentation)
        return var_count

    def compile_statements(self, token, indentation):
        """Compiles a sequence of statements, not including the enclosing ‘‘{}’’."""
        self.log_node('statements', indentation)
        while token.content != '}':
            if token.content == 'let':
                self.compile_let(token, indentation + 1)
                pass
            elif token.content == 'if':
                self.compile_if(token, indentation + 1)
                pass
            elif token.content == 'while':
                self.compile_while(token, indentation + 1)
                pass
            elif token.content == 'do':
                self.compile_do(token, indentation + 1)
                pass

            elif token.content == 'return':
                self.compile_return(token, indentation + 1)
                pass
            else:
                raise RuntimeError('unknown type in statements %s')
            if self.next() is not None and self.next().content == '}':
                break
            else:
                token = self.advance()
        self.log_node('/statements', indentation)
        return

    def compile_do(self, token: Token, indentation):
        self.log_node('doStatement', indentation)
        self.compile_token(token, indentation + 1, 'do')
        token = self.advance()
        self.compile_term(token, indentation + 1, do_term=True)
        self.vm_writer.write_pop('TEMP', 0, 'do call')
        token = self.advance()
        self.compile_token(token, indentation + 1, ';')
        # maybe a local subroutine or someone else's
        # token = self.advance()
        # self.compile_token(token, indentation + 1, [IDENTIFIER])
        # function_class_name = token.content
        # token = self.advance()
        # if token.content == '.':
        #     # someone else 's
        #     self.compile_token(token, indentation + 1, '.')
        #     token = self.advance()
        #     self.compile_token(token, indentation + 1, [IDENTIFIER])
        #     function_name = token.content
        #     token = self.advance()
        #     self.compile_token(token, indentation + 1, '(')
        #     token = self.advance()
        #     n_arg = self.compile_expression_list(token, indentation + 1)
        #     self.vm_writer.write_call(function_class_name + '.' + function_name, n_arg)
        #     # do calls must 'pop temp 0', because void functions always returns 0
        #     self.vm_writer.write_pop('TEMP', 0, 'do call')
        #     if token.content != ')':
        #         token = self.advance()
        #     self.compile_token(token, indentation + 1, ')')
        #     pass
        # else:
        #     self.compile_token(token, indentation + 1, '(')
        #     token = self.advance()
        #     self.compile_expression_list(token, indentation + 1)
        #     if token.content != ')':
        #         token = self.advance()
        #     self.compile_token(token, indentation + 1, ')')
        #     # local method
        #     pass
        # token = self.advance()
        # self.compile_token(token, indentation + 1, ';')
        self.log_node('/doStatement', indentation)
        return

    def compile_let(self, token: Token, indentation):
        """let length = Keyboard.readInt("HOW MANY NUMBERS? ");"""
        self.log_node('letStatement', indentation)
        # let
        self.compile_token(token, indentation + 1, 'let')
        #  length
        token = self.advance()
        self.compile_token(token, indentation + 1, [IDENTIFIER])
        var_name = token.content
        # = or [
        token = self.advance()
        array = False
        if token.content == '[':
            array = True
            self.compile_token(token, indentation + 1, '[')
            token = self.advance()
            # e.g x[y]
            # push y to stack
            self.compile_expression(token, indentation + 1)
            token = self.advance()
            self.compile_token(token, indentation + 1, ']')
            token = self.advance()
            # push x to stack
            self.write_push(var_name)
            # add x and y
            self.vm_writer.write_arithmetic('ADD')
            # # pop the result to THAT
            # self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_pop('TEMP', 2)
            pass
        self.compile_token(token, indentation + 1, '=')
        # expression
        token = self.advance()
        self.compile_expression(token, indentation + 1)
        if array:
            self.vm_writer.write_push('TEMP', 2)
            self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_pop('THAT', 0)
            pass
        else:
            if self.symbol_table.kind_of(var_name) == VAR:
                self.vm_writer.write_pop('LOCAL',
                                         self.symbol_table.index_of(var_name),
                                         var_name)
                pass
            elif self.symbol_table.kind_of(var_name) == ARG:
                self.vm_writer.write_pop('ARG',
                                         self.symbol_table.index_of(var_name),
                                         var_name)
                pass
            elif self.symbol_table.kind_of(var_name) == FIELD:
                self.vm_writer.write_pop('THIS',
                                         self.symbol_table.index_of(var_name),
                                         var_name)
                pass
            elif self.symbol_table.kind_of(var_name) == STATIC:
                self.vm_writer.write_pop('STATIC',
                                         self.symbol_table.index_of(var_name),
                                         var_name)
                pass
        # ;
        token = self.advance()
        self.compile_token(token, indentation + 1, ';')
        self.log_node('/letStatement', indentation)
        return

    def write_push(self, var_name):
        if self.symbol_table.kind_of(var_name) == VAR:
            self.vm_writer.write_push('LOCAL',
                                      self.symbol_table.index_of(var_name),
                                      var_name)
            pass
        elif self.symbol_table.kind_of(var_name) == ARG:
            self.vm_writer.write_push('ARG',
                                      self.symbol_table.index_of(var_name),
                                      var_name)
            pass
        elif self.symbol_table.kind_of(var_name) == FIELD:
            self.vm_writer.write_push('THIS',
                                      self.symbol_table.index_of(var_name),
                                      var_name)
            pass

    def compile_while(self, token: Token, indentation):
        while_label_pre = 'WHILE_%s' % self.while_label_index
        # label index++
        self.while_label_index += 1
        self.vm_writer.write_label('%s_EXP' % while_label_pre)
        self.log_node('whileStatement', indentation)
        self.compile_token(token, indentation + 1, 'while')
        token = self.advance()
        self.compile_token(token, indentation + 1, '(')
        token = self.advance()
        self.vm_writer.write_comment("calculating while condition expression")
        # expression
        self.compile_expression(token, indentation + 1)
        # )
        token = self.advance()
        self.compile_token(token, indentation + 1, ')')
        self.vm_writer.write_arithmetic('NOT')
        # checking condition expression
        self.vm_writer.write_if('%s_END' % while_label_pre)
        # {
        token = self.advance()
        self.compile_token(token, indentation + 1, '{')
        # statements
        token = self.advance()
        if token.content != '}':
            # not empty statement
            self.compile_statements(token, indentation + 1)
            # }
            token = self.advance()
        self.compile_token(token, indentation + 1, '}')
        self.vm_writer.write_goto('%s_EXP' % while_label_pre)
        self.vm_writer.write_label('%s_END' % while_label_pre)
        self.log_node('/whileStatement', indentation)
        return

    def compile_return(self, token: Token, indentation):
        self.log_node('returnStatement', indentation)
        self.compile_token(token, indentation + 1, 'return')
        token = self.advance()
        if token.content != ';':
            self.compile_expression(token, indentation + 1)
            token = self.advance()
            self.vm_writer.write_return()
        else:
            # for functions that return void, it must return an integer 0
            self.vm_writer.write_return(True)
            pass
        self.compile_token(token, indentation + 1, ';')
        self.log_node('/returnStatement', indentation)
        return

    def compile_if(self, token: Token, indentation):
        # if_label_pre = 'IF_%s' % self.if_label_index
        else_label = 'ELSE_%s' % self.if_label_index
        finish_label = 'FINISH_%s' % self.if_label_index
        # label index++
        self.if_label_index += 1

        self.log_node('ifStatement', indentation)
        self.compile_token(token, indentation + 1, 'if')
        token = self.advance()
        self.compile_token(token, indentation + 1, '(')
        self.vm_writer.write_comment("calculating if condition expression")
        token = self.advance()
        # expression
        self.compile_expression(token, indentation + 1)
        # )
        token = self.advance()
        self.compile_token(token, indentation + 1, ')')
        self.vm_writer.write_arithmetic('NOT')
        self.vm_writer.write_if(else_label)
        # {
        token = self.advance()
        self.compile_token(token, indentation + 1, '{')
        # statements
        token = self.advance()
        if token.content != '}':
            # not empty statement
            self.compile_statements(token, indentation + 1)
            # }
            token = self.advance()
        self.compile_token(token, indentation + 1, '}')
        if self.next().content == 'else':
            """
            if statements...
            (else vm code)
            goto FINISH // if statements finished, pass the else code
            lable ELSE
            else statements...
            label FINISH
            """
            self.vm_writer.write_goto(finish_label)
            self.vm_writer.write_label(else_label)
            token = self.advance()
            self.compile_token(token, indentation + 1, 'else')
            token = self.advance()
            self.compile_token(token, indentation + 1, '{')
            token = self.advance()
            self.compile_statements(token, indentation + 1)
            token = self.advance()
            self.compile_token(token, indentation + 1, '}')
            self.vm_writer.write_label(finish_label)
        else:
            """
            if statements...
            (no else vm code)
            label ELSE
            """
            self.vm_writer.write_label(else_label)
            pass
        self.log_node('/ifStatement', indentation)
        return

    def compile_expression(self, token, indentation):
        self.log_node('expression', indentation)
        self.compile_term(token, indentation + 1)
        while self.next() is not None and self.next(
        ).content in OP_SYMBOLS.keys():
            token = self.advance()
            self.compile_token(token, indentation + 1, [SYMBOL])
            op_symbol = OP_SYMBOLS[token.content]
            token = self.advance()
            self.compile_term(token, indentation + 1)
            # call op function after pushes the second parameter
            self.vm_writer.write_arithmetic(op_symbol)
        self.log_node('/expression', indentation)
        return

    def compile_term(self, token: Token, indentation, do_term=False):
        if not do_term:
            self.log_node('term', indentation)
        if token.token_type == INT_CONST:
            self.compile_token(token, indentation + 1, [INT_CONST])
            # todo
            self.vm_writer.write_push('CONST', token.content)
            pass
        elif token.token_type == STRING_CONST:
            """
            // construct a string
            push constant 3
            call String.new 1
            // the address of string is now on the stack
            push constant 72
            call String.appendChar 2
            push constant 73
            call String.appendChar 2
            push constant 74
            call String.appendChar 2
            // string construct complete its still on sp
            """
            length = len(token.content)
            self.vm_writer.write_push('CONST', length)
            self.vm_writer.write_call('String.new', 1)
            for c in token.content:
                self.vm_writer.write_push('CONST', ord(c))
                self.vm_writer.write_call('String.appendChar', 2)
                pass
            self.compile_token(token, indentation + 1)
            # keyword constant
        elif token.content == 'true':
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('CONST', 1)
            self.vm_writer.write_arithmetic('NEG')
            pass
        elif token.content == 'false':
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('CONST', 0)
            pass
        elif token.content == 'this':
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('POINTER', 0)
            pass
        elif token.content == 'null':
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('CONST', 0)
            pass
        elif token.content in ['true', 'false', 'null', 'this']:
            self.compile_token(token, indentation + 1)
            self.vm_writer.write_push('POINTER', 0)
            # self.vm_writer.write_comment('%s not implemented' % token.content)
            pass
        elif self.next().content == '[':
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            self.write_push(token.content)
            token = self.advance()
            self.compile_token(token, indentation + 1, '[')
            token = self.advance()
            self.compile_expression(token, indentation + 1)
            token = self.advance()
            self.compile_token(token, indentation + 1, ']')
            self.vm_writer.write_arithmetic('ADD')
            self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_push('THAT', 0)
            pass
        elif token.content == '(':
            self.compile_token(token, indentation + 1, '(')
            token = self.advance()
            self.compile_expression(token, indentation + 1)
            if token.content != ')':
                token = self.advance()
            self.compile_token(token, indentation + 1, ')')
            pass
        elif token.content in UNARY_OP_SYMBOL.keys():
            self.compile_token(token, indentation + 1)
            unary_op = UNARY_OP_SYMBOL[token.content]
            token = self.advance()
            self.compile_term(token, indentation + 1)
            self.vm_writer.write_arithmetic(unary_op)
        # elif self.next().content == ';':
        #     # varname
        #     self.compile_token(token, indentation + 1)
        #     pass
        elif self.next().content == '(':
            # method call
            n_arg = 1
            self.vm_writer.write_push('POINTER', 0)
            # self.vm_writer.write_pop(ARG, 0)
            function_class_name = self.class_name
            function_name = token.content
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            token = self.advance()
            self.compile_token(token, indentation + 1, '(')
            token = self.advance()
            n_arg += self.compile_expression_list(token, indentation + 1)
            self.vm_writer.write_call(
                function_class_name + '.' + function_name, n_arg)
            if token.content != ')':
                token = self.advance()
            self.compile_token(token, indentation + 1, ')')
            pass
        elif self.next().content == '.':
            # static function call
            # class name
            n_arg = 0
            function_class_name = token.content
            if self.symbol_table.index_of(function_class_name) > -1:
                n_arg += 1
                self.vm_writer.write_push(
                    self.symbol_table.kind_of(function_class_name),
                    self.symbol_table.index_of(function_class_name),
                    function_class_name)
                function_class_name = self.symbol_table.type_of(
                    function_class_name)
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            token = self.advance()
            self.compile_token(token, indentation + 1, '.')
            #  function name
            token = self.advance()
            function_name = token.content
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            token = self.advance()
            self.compile_token(token, indentation + 1, '(')
            token = self.advance()
            n_arg += self.compile_expression_list(token, indentation + 1)
            self.vm_writer.write_call(
                function_class_name + '.' + function_name, n_arg)
            if token.content != ')':
                token = self.advance()
            self.compile_token(token, indentation + 1, ')')
            pass
        elif token.token_type == IDENTIFIER:
            # varName
            self.compile_token(token, indentation + 1, [IDENTIFIER])
            # todo 处理不同情形
            if self.symbol_table.kind_of(token.content) == VAR:
                self.vm_writer.write_push(
                    'LOCAL', self.symbol_table.index_of(token.content),
                    token.content)
            elif self.symbol_table.kind_of(token.content) == ARG:
                self.vm_writer.write_push(
                    'ARG', self.symbol_table.index_of(token.content),
                    token.content)
                pass
            elif self.symbol_table.kind_of(token.content) == FIELD:
                self.vm_writer.write_push(
                    'FIELD', self.symbol_table.index_of(token.content),
                    token.content)
                pass
            elif self.symbol_table.kind_of(token.content) == STATIC:
                self.vm_writer.write_push(
                    'STATIC', self.symbol_table.index_of(token.content),
                    token.content)
                pass
            pass
        else:
            raise RuntimeError("Uncaught situation", token)
        if not do_term:
            self.log_node('/term', indentation)
        return

    def compile_expression_list(self, token: Token, indentation) -> int:
        self.log_node('expressionList', indentation)
        n_expression = 0
        while token.content != ')':
            n_expression += 1
            self.compile_expression(token, indentation + 1)
            if self.next() is not None and self.next().content == ',':
                # multiple expression list
                token = self.advance()
                self.compile_token(token, indentation + 1, ',')
                token = self.advance()
            elif self.next() is not None and self.next().content == ')':
                break
            else:
                print('UNEXPECTED token in compile_expression_list', token)
                token = self.advance()
        self.log_node('/expressionList', indentation)
        return n_expression
class CompilationEngine():
    """
    compiles a jack source file from a jack tokenizer into xml form in output_file
    NOTE: ASSUMES ERROR FREE CODE -> a todo could be to add error handling
    """
    SYMBOL_KINDS = {'parameter_list': 'argument', 'var_dec': 'local'}
    STARTING_TOKENS = {
        'var_dec': ['var'],
        'parameter_list': ['('],
        'subroutine_body': ['{'],
        'expression_list': ['('],
        'expression': ['=', '[', '('],
        'array': ['['],
        'conditional': ['if', 'else']
    }
    TERMINATING_TOKENS = {
        'class': ['}'],
        'class_var_dec': [';'],
        'subroutine': ['}'],
        'parameter_list': [')'],
        'expression_list': [')'],
        'statements': ['}'],
        'do': [';'],
        'let': [';'],
        'while': ['}'],
        'if': ['}'],
        'var_dec': [';'],
        'return': [';'],
        'expression': [';', ')', ']', ','],
        'array': [']']
    }
    TOKENS_THAT_NEED_LABELS = ['if', 'while']

    def __init__(self, tokenizer, output_file):
        self.tokenizer = tokenizer
        self.output_file = output_file
        self.class_symbol_table = SymbolTable()
        self.subroutine_symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)
        self.label_counter = LabelCounter(labels=self.TOKENS_THAT_NEED_LABELS)
        self.class_name = None

    def compile_class(self):
        """
        everything needed to compile a class, the basic unit of compilation
        """
        # skip everything up to class start
        while not self.tokenizer.class_token_reached():
            self.tokenizer.advance()
        # since compilation unit is a class makes sense to store this as instance variable
        self.class_name = self.tokenizer.next_token.text

        while self.tokenizer.has_more_tokens:
            self.tokenizer.advance()

            if self.tokenizer.current_token.starts_class_var_dec():
                self.compile_class_var_dec()
            elif self.tokenizer.current_token.starts_subroutine():
                self.compile_subroutine()

    def compile_class_var_dec(self):
        """
        example: field int x;
        """
        symbol_kind = self.tokenizer.keyword()

        # get symbol type
        self.tokenizer.advance()
        symbol_type = self.tokenizer.keyword()

        # get all identifiers
        while self._not_terminal_token_for('class_var_dec'):
            self.tokenizer.advance()

            if self.tokenizer.identifier():
                # add symbol to class
                symbol_name = self.tokenizer.identifier()
                self.class_symbol_table.define(name=symbol_name,
                                               kind=symbol_kind,
                                               symbol_type=symbol_type)

    def compile_subroutine(self):
        """
        example: methoid void dispose() { ...
        """
        # new subroutine means new subroutine scope
        self.subroutine_symbol_table.reset()

        # get subroutine name
        self.tokenizer.advance()
        self.tokenizer.advance()
        subroutine_name = self.tokenizer.current_token.text

        # compile parameter list
        self.tokenizer.advance()
        self.compile_parameter_list()

        # compile body
        self.tokenizer.advance()
        self.compile_subroutine_body(subroutine_name=subroutine_name)

        # rest counts from subroutine
        self.label_counter.reset_counts()

    def compile_subroutine_body(self, subroutine_name):
        # skip start
        self.tokenizer.advance()
        # get all locals
        num_locals = 0
        while self._starting_token_for('var_dec'):
            num_locals += self.compile_var_dec()
            self.tokenizer.advance()

        # write function command
        self.vm_writer.write_function(name='{}.{}'.format(
            self.class_name, subroutine_name),
                                      num_locals=num_locals)

        # compile all statements
        while self._not_terminal_token_for('subroutine'):
            self.compile_statements()

    def compile_parameter_list(self):
        """
        example: dispose(int a, int b)
        returns number of params found
        """
        ### symbol table
        while self._not_terminal_token_for('parameter_list'):
            self.tokenizer.advance()

            # symbol table
            if self.tokenizer.next_token.is_identifier():
                symbol_kind = self.SYMBOL_KINDS['parameter_list']
                symbol_type = self.tokenizer.current_token.text
                symbol_name = self.tokenizer.next_token.text
                self.subroutine_symbol_table.define(name=symbol_name,
                                                    kind=symbol_kind,
                                                    symbol_type=symbol_type)

    # 'var' type varName (',' varName)* ';'
    def compile_var_dec(self):
        """
        example: var int a;
        """
        # skip var
        self.tokenizer.advance()
        # get symbol type
        symbol_type = self.tokenizer.current_token.text
        # count number of vars, i.e., var int i, sum = 2
        num_vars = 0

        # get all vars
        while self._not_terminal_token_for('var_dec'):
            self.tokenizer.advance()

            if self.tokenizer.identifier():
                num_vars += 1
                symbol_kind = self.SYMBOL_KINDS['var_dec']
                symbol_name = self.tokenizer.identifier()
                self.subroutine_symbol_table.define(name=symbol_name,
                                                    kind=symbol_kind,
                                                    symbol_type=symbol_type)
        # return vars processed
        return num_vars

    def compile_statements(self):
        """
        call correct statement
        """
        # TODO: way to make this global for class?
        statement_compile_methods = {
            'if': self.compile_if,
            'do': self.compile_do,
            'let': self.compile_let,
            'while': self.compile_while,
            'return': self.compile_return
        }

        while self._not_terminal_token_for('subroutine'):
            if self.tokenizer.current_token.is_statement_token():
                statement_type = self.tokenizer.current_token.text
                statement_compile_methods[statement_type]()

            self.tokenizer.advance()

    def compile_do(self):
        """
        example: do square.dispose();
        """
        # get to caller
        self.tokenizer.advance()
        # set caller_name
        caller_name = self.tokenizer.current_token.text
        # look up in symbol table
        symbol = self._find_symbol_in_symbol_tables(symbol_name=caller_name)
        # skip .
        self.tokenizer.advance()
        # subroutine name
        self.tokenizer.advance()
        # set subroutine name
        subroutine_name = self.tokenizer.current_token.text

        if symbol:  # user defined Method
            # push value onto local segment
            segment = 'local'
            index = symbol['index']
            symbol_type = symbol['type']
            self.vm_writer.write_push(segment=segment, index=index)
        else:  # i.e, OS call
            symbol_type = caller_name

        subroutine_call_name = symbol_type + '.' + subroutine_name
        # start expression list
        self.tokenizer.advance()
        # get arguments in expession list
        num_args = self.compile_expression_list()
        # method call
        if symbol:
            # calling object passed as implicit argument
            num_args += 1
        # write call
        self.vm_writer.write_call(name=subroutine_call_name, num_args=num_args)
        # pop off return of previous call we don't care about
        self.vm_writer.write_pop(segment='temp', index='0')

    # 'let' varName ('[' expression ']')? '=' expression ';'
    def compile_let(self):
        """
        example: let direction = 0;
        """
        # get symbol to store expression evaluation
        self.tokenizer.advance()
        symbol_name = self.tokenizer.current_token.text
        symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name)

        # array assignment?
        array_assignment = self._starting_token_for(keyword_token='array',
                                                    position='next')
        if array_assignment:
            # get to index expression
            self.tokenizer.advance()
            self.tokenizer.advance()
            # compile it
            self.compile_expression()
            self.vm_writer.write_push(segment=symbol['kind'],
                                      index=symbol['index'])
            # add two addresses
            self.vm_writer.write_arithmetic(command='+')

        # go past =
        while not self.tokenizer.current_token.text == '=':
            self.tokenizer.advance()
        # compile all expressions
        while self._not_terminal_token_for('let'):
            self.tokenizer.advance()
            self.compile_expression()

        if not array_assignment:
            # store expression evaluation in symbol location
            self.vm_writer.write_pop(segment=symbol['kind'],
                                     index=symbol['index'])
        else:  # array unloading
            # pop return value onto temp
            self.vm_writer.write_pop(segment='temp', index='0')
            # pop address of array slot onto THAT
            self.vm_writer.write_pop(segment='pointer',
                                     index='1')  # pointer 1 => array
            # push value on temp back onto stack
            self.vm_writer.write_push(segment='temp', index='0')
            # set that
            self.vm_writer.write_pop(segment='that', index='0')

    # 'while' '(' expression ')' '{' statements '}'
    def compile_while(self):
        """
        example: while (x > 0) { ... }
        """
        # write while label
        self.vm_writer.write_label(
            label='WHILE_EXP{}'.format(self.label_counter.get('while')))

        # advance to expression start (
        self.tokenizer.advance()
        self.tokenizer.advance()

        # compile expression in ()
        self.compile_expression()

        # NOT expression so for easily handling of termination and if-goto
        self.vm_writer.write_unary(command='~')
        self.vm_writer.write_ifgoto(
            label='WHILE_END{}'.format(self.label_counter.get('while')))

        while self._not_terminal_token_for('while'):
            self.tokenizer.advance()

            if self._statement_token():
                self.compile_statements()

        # write goto
        self.vm_writer.write_goto(
            label='WHILE_EXP{}'.format(self.label_counter.get('while')))
        # write end label
        self.vm_writer.write_label(
            label='WHILE_END{}'.format(self.label_counter.get('while')))
        # add while to labels count
        self.label_counter.increment('while')

    def compile_if(self):
        """
        example: if (True) { ... } else { ... }
        """
        # advance to expression start
        self.tokenizer.advance()
        self.tokenizer.advance()
        # compile expression in ()
        self.compile_expression()
        # write ifgoto to if statement
        self.vm_writer.write_ifgoto(
            label='IF_TRUE{}'.format(self.label_counter.get('if')))
        # write goto if false (else)
        self.vm_writer.write_goto(
            label='IF_FALSE{}'.format(self.label_counter.get('if')))
        # write if label
        self.vm_writer.write_label(
            label='IF_TRUE{}'.format(self.label_counter.get('if')))
        # body of if
        self.compile_conditional_body()
        # else?
        if self._starting_token_for(keyword_token='conditional',
                                    position='next'):
            # past closing {
            self.tokenizer.advance()
            # goto if end if this path wasn't hit
            self.vm_writer.write_goto(
                label='IF_END{}'.format(self.label_counter.get('if')))
            # if false
            self.vm_writer.write_label(
                label='IF_FALSE{}'.format(self.label_counter.get('if')))
            # compile else
            self.compile_conditional_body()
            # define IF_END
            self.vm_writer.write_label(
                label='IF_END{}'.format(self.label_counter.get('if')))
        else:  # no else present
            # go to end of if
            self.vm_writer.write_label(
                label='IF_FALSE{}'.format(self.label_counter.get('if')))

    def compile_conditional_body(self):
        while self._not_terminal_token_for('if'):
            self.tokenizer.advance()

            if self._statement_token():
                if self.tokenizer.current_token.is_if():
                    # add ifto labels count
                    self.label_counter.increment('if')
                    # compile nested if
                    self.compile_statements()
                    # subtract for exiting nesting
                    self.label_counter.decrement('if')
                else:
                    self.compile_statements()

    # term (op term)*
    def compile_expression(self):
        """
        many examples..i,e., x = 4
        """
        # ops get compiled at end in reverse order in which they were added
        ops = []

        while self._not_terminal_token_for('expression'):
            if self._subroutine_call():
                self.compile_subroutine_call()
            elif self._array_expression():
                self.compile_array_expression()
            elif self.tokenizer.current_token.text.isdigit():
                self.vm_writer.write_push(
                    segment='constant',
                    index=self.tokenizer.current_token.text)
            elif self.tokenizer.identifier():
                self.compile_symbol_push()
            elif self.tokenizer.current_token.is_operator(
            ) and not self._part_of_expression_list():
                ops.insert(
                    0,
                    Operator(token=self.tokenizer.current_token.text,
                             category='bi'))
            elif self.tokenizer.current_token.is_unary_operator():
                ops.insert(
                    0,
                    Operator(token=self.tokenizer.current_token.text,
                             category='unary'))
            elif self.tokenizer.string_const():
                self.compile_string_const()
            elif self.tokenizer.boolean():  # boolean case
                self.compile_boolean()
            elif self._starting_token_for('expression'):  # nested expression
                # skip starting (
                self.tokenizer.advance()
                self.compile_expression()
            elif self.tokenizer.null():
                self.vm_writer.write_push(segment='constant', index=0)

            self.tokenizer.advance()

        # compile_ops
        for op in ops:
            self.compile_op(op)

    def compile_op(self, op):
        """
        example: +, /, etc.
        """
        if op.unary():
            self.vm_writer.write_unary(command=op.token)
        elif op.multiplication():
            self.vm_writer.write_call(name='Math.multiply', num_args=2)
        elif op.division():
            self.vm_writer.write_call(name='Math.divide', num_args=2)
        else:
            self.vm_writer.write_arithmetic(command=op.token)

    def compile_boolean(self):
        """
        'true' and 'false'
        """
        self.vm_writer.write_push(segment='constant', index=0)

        if self.tokenizer.boolean() == 'true':
            # negate true
            self.vm_writer.write_unary(command='~')

    def compile_string_const(self):
        """
        example: "Hello World"
        """
        # handle string const
        string_length = len(self.tokenizer.string_const())
        self.vm_writer.write_push(segment='constant', index=string_length)
        self.vm_writer.write_call(name='String.new', num_args=1)
        # build string from chars
        for char in self.tokenizer.string_const():
            if not char == self.tokenizer.STRING_CONST_DELIMITER:
                ascii_value_of_char = ord(char)
                self.vm_writer.write_push(segment='constant',
                                          index=ascii_value_of_char)
                self.vm_writer.write_call(name='String.appendChar', num_args=2)

    def compile_symbol_push(self):
        """
        example: x
        """
        symbol = self._find_symbol_in_symbol_tables(
            symbol_name=self.tokenizer.identifier())
        segment = symbol['kind']
        index = symbol['index']
        self.vm_writer.write_push(segment=segment, index=index)

    def compile_array_expression(self):
        """
        example: let x = a[j], a[4]
        """
        symbol_name = self.tokenizer.current_token.text
        symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name)
        # get to index expression
        self.tokenizer.advance()
        self.tokenizer.advance()
        # compile
        self.compile_expression()
        # push onto local array symbol
        self.vm_writer.write_push(segment='local', index=symbol['index'])
        # add two addresses: identifer and expression result
        self.vm_writer.write_arithmetic(command='+')
        # pop address onto pointer 1 / THAT
        self.vm_writer.write_pop(segment='pointer', index=1)
        # push value onto stack
        self.vm_writer.write_push(segment='that', index=0)

    def compile_subroutine_call(self):
        """
        example: Memory.peek(8000)
        """
        subroutine_name = ''

        while not self._starting_token_for('expression_list'):
            subroutine_name += self.tokenizer.current_token.text
            self.tokenizer.advance()
        # get num of args
        num_args = self.compile_expression_list()
        # write_call after pushing arguments onto stack
        self.vm_writer.write_call(name=subroutine_name, num_args=num_args)

    # (expression (',' expression)* )?
    def compile_expression_list(self):
        """
        separeted out of compile_expression because of edge cases from normal expression
        example: (x, y, x + 5)
        """
        num_args = 0

        if self._empty_expression_list():
            return num_args

        # start expressions
        self.tokenizer.advance()

        while self._not_terminal_token_for('expression_list'):
            num_args += 1
            self.compile_expression()
            if self._another_expression_coming(
            ):  # would be , after compile expression
                self.tokenizer.advance()
        return num_args

    def compile_return(self):
        """
        example: return x; or return;
        """
        if self._not_terminal_token_for(keyword_token='return',
                                        position='next'):
            self.compile_expression()
        else:  # push constant for void
            self.vm_writer.write_push(segment='constant', index='0')
            self.tokenizer.advance()

        self.vm_writer.write_return()

    def _not_terminal_token_for(self, keyword_token, position='current'):
        if position == 'current':
            return not self.tokenizer.current_token.text in self.TERMINATING_TOKENS[
                keyword_token]
        elif position == 'next':
            return not self.tokenizer.next_token.text in self.TERMINATING_TOKENS[
                keyword_token]

    def _starting_token_for(self, keyword_token, position='current'):
        if position == 'current':
            return self.tokenizer.current_token.text in self.STARTING_TOKENS[
                keyword_token]
        elif position == 'next':
            return self.tokenizer.next_token.text in self.STARTING_TOKENS[
                keyword_token]

    def _statement_token(self):
        return self.tokenizer.current_token.is_statement_token()

    def _another_expression_coming(self):
        return self.tokenizer.current_token.is_expression_list_delimiter()

    def _find_symbol_in_symbol_tables(self, symbol_name):
        if self.subroutine_symbol_table.find_symbol_by_name(symbol_name):
            return self.subroutine_symbol_table.find_symbol_by_name(
                symbol_name)
        elif self.class_symbol_table.find_symbol_by_name(symbol_name):
            return self.class_symbol_table.find_symbol_by_name(symbol_name)

    def _empty_expression_list(self):
        return self._start_of_expression_list(
        ) and self._next_ends_expression_list()

    def _start_of_expression_list(self):
        return self.tokenizer.current_token.text in self.STARTING_TOKENS[
            'expression_list']

    def _next_ends_expression_list(self):
        return self.tokenizer.next_token.text in self.TERMINATING_TOKENS[
            'expression_list']

    def _subroutine_call(self):
        return self.tokenizer.identifier(
        ) and self.tokenizer.next_token.is_subroutine_call_delimiter()

    def _array_expression(self):
        return self.tokenizer.identifier() and self._starting_token_for(
            keyword_token='array', position='next')

    def _part_of_expression_list(self):
        return self.tokenizer.part_of_expression_list()
class CompilationEngine:

    def __init__(self, inputFile, outputFile):
        self.tokenizer = JackTokenizer(inputFile)
        self.vmWriter = VMWriter(outputFile)
        self.symbolTable = SymbolTable()
        self.classname = ""
        self.CompileClass()
        self.whilecounter = 0
        self.ifcounter = 0

    def CompileClass(self):
        #classname
        self.tokenizer.advance()
        self.classname = self.tokenizer.identifier()
        self.tokenizer.advance()
        # ignore {
        self.tokenizer.advance()

        while self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord() == "field":
            self.CompileClassVarDec()

        while self.tokenizer.keyWord() == "constructor" or self.tokenizer.keyWord() == "function" or self.tokenizer.keyWord() == "method":
            self.CompileSubroutine()

        #ignore }
        self.tokenizer.advance()



    def CompileClassVarDec(self):

        kind = self.tokenizer.keyWord()
        self.tokenizer.advance()
        type = self.compileType()
        name = self.tokenizer.identifier()
        self.symbolTable.define(name, type, kind)
        self.tokenizer.advance()

        # add the rest of var names, if there are
        while self.tokenizer.symbol() == ",":
            self.tokenizer.advance()
            name = self.tokenizer.identifier()
            self.symbolTable.define(name, type, kind)
            self.tokenizer.advance()

        # ignore ;
        self.tokenizer.advance()

    def CompileSubroutine(self):

        self.symbolTable.startSubroutine()
        self.ifcounter = 0
        self.whilecounter = 0
        # constructor | function | method
        functype = self.tokenizer.keyWord()
        self.tokenizer.advance()

        if functype == "method":
            self.symbolTable.define("this", self.classname, "arg")

        self.tokenizer.advance()

        subrotineName = self.classname + "." + self.tokenizer.identifier()
        self.tokenizer.advance()

        # ( parameterList )
        self.tokenizer.advance()
        self.compileParameterList()
        self.tokenizer.advance()

        # subrotineBody
        # ignore {
        self.tokenizer.advance()
        # varDec*
        while self.tokenizer.keyWord() == "var":
            self.compileVarDec()

        self.vmWriter.writeFunction(subrotineName, self.symbolTable.varCount("var"))
        # allocate memory for constructor
        # if functype == "constructor":
        #     self.vmWriter.writePush("constant" , self.symbolTable.varCount("field"))
        #     self.vmWriter.writeCall("Memory.alloc", "1")

        if functype == "constructor" or functype == "method":
            if functype == "constructor":
                self.vmWriter.writePush("constant" , self.symbolTable.varCount("field"))
                self.vmWriter.writeCall("Memory.alloc", "1")
            else:
                self.vmWriter.writePush("argument", "0")
            self.vmWriter.writePop("pointer", "0")


        # statements
        self.compileStatements()

        # ignore }
        self.tokenizer.advance()

    def compileParameterList(self):
        # if not )
        if self.tokenizer.tokenType() != 1:

            # type varName
            argtype = self.compileType()
            argname = self.tokenizer.identifier()
            self.symbolTable.define(argname, argtype, "arg")
            self.tokenizer.advance()

            # (, type varName)*
            while self.tokenizer.symbol() == ",":
                self.tokenizer.advance()
                argtype = self.compileType()
                argname = self.tokenizer.identifier()
                self.symbolTable.define(argname, argtype, "arg")
                self.tokenizer.advance()

    def compileVarDec(self):

        # var
        self.tokenizer.advance()

        # type
        type = self.compileType()

        # varName
        varname = self.tokenizer.identifier()
        self.symbolTable.define(varname, type, "var")
        self.tokenizer.advance()

        # (, varName)*
        while self.tokenizer.symbol() == ",":
            self.tokenizer.advance()
            varname = self.tokenizer.identifier()
            self.symbolTable.define(varname, type, "var")

            self.tokenizer.advance()

        # ignore ;
        self.tokenizer.advance()


    def compileStatements(self):

        while self.tokenizer.tokenType() == 0:
            if self.tokenizer.keyWord() == "let":
                self.compileLet()
            elif self.tokenizer.keyWord() == "if":
                self.compileIf()
            elif self.tokenizer.keyWord() == "while":
                self.compileWhile()
            elif self.tokenizer.keyWord() == "do":
                self.compileDo()
            elif self.tokenizer.keyWord() == "return":
                self.compileReturn()


    def compileDo(self):

        self.tokenizer.advance()
        self.compileSubRoutineCall()
        self.vmWriter.writePop("temp", "0")

        # ignore ;
        self.tokenizer.advance()

    def compileLet(self):

        # let
        self.tokenizer.advance()
        # varName
        varname = self.tokenizer.identifier()
        varkind = self.symbolTable.kindOf(varname)

        self.tokenizer.advance()

        # ([ expression ])?
        if self.tokenizer.symbol() == "[":
            self.tokenizer.advance()
            self.CompileExpression()
            if varkind == "field":
                self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
            elif varkind == "var":
                self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
            elif varkind == "arg":
                self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
            elif varkind == "static":
                self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
            self.vmWriter.writeArithmetic("add")

            #ignore ]
            self.tokenizer.advance()
            #ignore =
            self.tokenizer.advance()
            self.CompileExpression()
            self.vmWriter.writePop("temp", "0")

            # that
            self.vmWriter.writePop("pointer", "1")
            self.vmWriter.writePush("temp", "0")
            self.vmWriter.writePop("that", "0")
            self.tokenizer.advance()


        else:

            # ignore =
            self.tokenizer.advance()

            # expression
            self.CompileExpression()

            if varkind == "field":
                self.vmWriter.writePop("this", self.symbolTable.indexOf(varname))
            elif varkind == "var":
                self.vmWriter.writePop("local", self.symbolTable.indexOf(varname))
            elif varkind == "arg":
                self.vmWriter.writePop("argument", self.symbolTable.indexOf(varname))
            elif varkind == "static":
                self.vmWriter.writePop("static", self.symbolTable.indexOf(varname))

            #ignore ;
            self.tokenizer.advance()


    def compileWhile(self):

        # while
        self.tokenizer.advance()

        # ( expression )
        self.tokenizer.advance()
        whileindex = self.whilecounter
        self.whilecounter += 1
        self.vmWriter.writeLabel("WHILE_EXP" + str(whileindex))
        self.CompileExpression()
        self.vmWriter.writeArithmetic("not")
        self.vmWriter.writeIf("WHILE_END" + str(whileindex))
        self.tokenizer.advance()

        # ignore {
        self.tokenizer.advance()

        # statements
        self.compileStatements()

        # ignore }
        self.tokenizer.advance()
        self.vmWriter.writeGoto("WHILE_EXP" + str(whileindex))
        self.vmWriter.writeLabel("WHILE_END" + str(whileindex))

    def compileReturn(self):

        # return
        self.tokenizer.advance()

        # expression?
        if self.isTerm():
            self.CompileExpression()
            self.vmWriter.writeReturn()
        else:
            self.vmWriter.writePush("constant", "0")
            self.vmWriter.writeReturn()

        # ignore;
        self.tokenizer.advance()


    def compileIf(self):
        #if
        self.tokenizer.advance()
        # ( expression )
        self.tokenizer.advance()
        self.CompileExpression()
        ifindex = self.ifcounter
        self.ifcounter += 1
        self.vmWriter.writeIf("IF_TRUE" + str(ifindex))
        self.vmWriter.writeGoto("IF_FALSE" + str(ifindex))
        self.vmWriter.writeLabel("IF_TRUE" + str(ifindex))
        self.tokenizer.advance()

        # { statements }
        self.tokenizer.advance()
        self.compileStatements()
        self.tokenizer.advance()

        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() == "else":
            # else
            self.vmWriter.writeGoto("IF_END" + str(ifindex))
            self.vmWriter.writeLabel("IF_FALSE" + str(ifindex))

            self.tokenizer.advance()

            # { statements }
            self.tokenizer.advance()
            self.compileStatements()
            self.tokenizer.advance()

            self.vmWriter.writeLabel("IF_END" + str(ifindex))

        else:
            self.vmWriter.writeLabel("IF_FALSE" + str(ifindex))


    def CompileExpression(self):
        #term
        self.CompileTerm()
        # (op term)*
        op = self.tokenizer.symbol()
        while self.tokenizer.tokenType() == 1 and op in operators:
            self.tokenizer.advance()
            self.CompileTerm()
            if op == "=":
                self.vmWriter.writeArithmetic("eq")
            elif op == "+":
                self.vmWriter.writeArithmetic("add")
            elif op == "-":
                self.vmWriter.writeArithmetic("sub")
            elif op == "*":
                self.vmWriter.writeCall("Math.multiply", "2")
            elif op == "/":
                self.vmWriter.writeCall("Math.divide", "2")
            elif op == "&amp;":
                self.vmWriter.writeArithmetic("and")
            elif op == "|":
                self.vmWriter.writeArithmetic("or")
            elif op == "&lt;":
                self.vmWriter.writeArithmetic("lt")
            elif op == "&gt;":
                self.vmWriter.writeArithmetic("gt")
            op = self.tokenizer.symbol()

    def CompileTerm(self):
        if self.tokenizer.tokenType() == 3:
            self.vmWriter.writePush("constant", self.tokenizer.intVal())
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 4:
            conststring = self.tokenizer.stringVal()
            self.vmWriter.writePush("constant", str(len(conststring)))
            self.vmWriter.writeCall("String.new", "1")
            for i in range(len(conststring)):
                self.vmWriter.writePush("constant", str(ord(conststring[i])))
                self.vmWriter.writeCall("String.appendChar", "2")

            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 0:
            keywordconst = self.tokenizer.keyWord()
            if keywordconst == "true":
                self.vmWriter.writePush("constant", "0")
                self.vmWriter.writeArithmetic("not")
            elif keywordconst == "false" or keywordconst == "null":
                self.vmWriter.writePush("constant", "0")
            elif keywordconst == "this":
                self.vmWriter.writePush("pointer", "0")
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 2:
            # varName [ expression]
            if self.tokenizer.tokens[self.tokenizer.currentToken +1] == '[':
                varname = self.tokenizer.identifier()
                varkind = self.symbolTable.kindOf(varname)
                self.tokenizer.advance()
                # [ expression ]
                self.tokenizer.advance()
                self.CompileExpression()
                if varkind == "field":
                    self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
                elif varkind == "var":
                    self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
                elif varkind == "arg":
                    self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
                elif varkind == "static":
                    self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
                self.vmWriter.writeArithmetic("add")
                # that
                self.vmWriter.writePop("pointer", "1")
                self.vmWriter.writePush("that", "0")
                self.tokenizer.advance()
            # subrutine call
            elif self.tokenizer.tokens[self.tokenizer.currentToken +1] == '(' or self.tokenizer.tokens[self.tokenizer.currentToken +1] == '.':
                self.compileSubRoutineCall()
            # varname
            else:
                varname = self.tokenizer.identifier()
                varkind = self.symbolTable.kindOf(varname)
                if varkind == "field":
                    self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
                elif varkind == "var":
                    self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
                elif varkind == "arg":
                    self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
                elif varkind == "static":
                    self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
                self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(':
            # ( expression )
            self.tokenizer.advance()
            self.CompileExpression()
            self.tokenizer.advance()
        else:
            #unary!!!
            op = self.tokenizer.symbol()
            self.tokenizer.advance()
            self.CompileTerm()
            if op == "-":
                self.vmWriter.writeArithmetic("neg")
            elif op == "~":
                self.vmWriter.writeArithmetic("not")

    def compileSubRoutineCall(self):
        # subroutineName  | (className | varName)
        identifier = self.tokenizer.identifier()
        self.tokenizer.advance()
        #no "." only name
        if self.tokenizer.symbol() == '(':
            # ( expressionList ) -- subroutine of type method
            self.tokenizer.advance()
            self.vmWriter.writePush("pointer", "0")
            argnum = self.CompileExpressionList()
            self.vmWriter.writeCall(self.classname + "." + identifier, str(argnum +1))

            self.tokenizer.advance()
        else:
            # . -- class.function or var.method
            self.tokenizer.advance()
            # subroutineName
            subname = self.tokenizer.identifier()
            self.tokenizer.advance()

            self.tokenizer.advance()
            if identifier in self.symbolTable.classtable or identifier in self.symbolTable.subroutinetable:
                # varname!!!
                if identifier in self.symbolTable.subroutinetable:
                    if self.symbolTable.kindOf(identifier) == "var":
                        self.vmWriter.writePush("local", self.symbolTable.indexOf(identifier))
                    else:
                        self.vmWriter.writePush("argument", self.symbolTable.indexOf(identifier))
                else:
                    if self.symbolTable.kindOf(identifier) == "static":
                        self.vmWriter.writePush("static", self.symbolTable.indexOf(identifier))
                    else:
                        self.vmWriter.writePush("this", self.symbolTable.indexOf(identifier))


                argnum = self.CompileExpressionList()
                identifierclass = self.symbolTable.typeOf(identifier)
                self.vmWriter.writeCall(identifierclass + "." + subname, str(argnum +1))
            else:
                argnum = self.CompileExpressionList()
                self.vmWriter.writeCall(identifier + "." + subname, str(argnum))
            self.tokenizer.advance()

    def CompileExpressionList(self):
        # (expression
        i = 0
        if self.isTerm():
            i += 1
            # (, expression)
            self.CompileExpression()
            while self.tokenizer.symbol() == ',':
                i+= 1
                self.tokenizer.advance()
                self.CompileExpression()
        return i

    def isTerm(self):
        if self.tokenizer.tokenType() == 3 or self.tokenizer.tokenType() == 4:
            return True
        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() in keyword_const:
            return True
        if self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(' :
            return True
        if self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == '-' or self.tokenizer.symbol() == '~'):
            return True
        if self.tokenizer.tokenType() == 2:
            return True
        return False

    def compileType(self):
        if self.tokenizer.tokenType() == 0:
            typen = self.tokenizer.keyWord()
        else:
            typen = self.tokenizer.identifier()
        self.tokenizer.advance()
        return typen
Example #34
0
from SymbolTable import SymbolTable

tab = SymbolTable()

tab.addEntry("testSymbol", 32767)

print "=====Testing SymbolTable"

if not tab.contains("testSymbol"):
    print "SymbolTable.contains should return true"

if tab.getAddress("testSymbol") != "111111111111111":
    print "SymbolTable.getAddress did not return correct value"

tab.addVariable("testVar")

if tab.getAddress("testVar") != "000000000010000":
    print "SymbolTable.getAddress did not return correct value for var"

print "Done testing SymbolTable====="
Example #35
0
class STManager():
    def __init__(self):
        # create the root symbol table
        self.root = ST()
        # intialize the stack of symbol tables for activation record
        self.activeSTs = [self.root]
        # table of funcitons
        self.ftable = {};


    """ Make Symbol Table
        create a new table and return a pointer to new table
        @params _prev {SymbolTable}  --  parent symbol table pointer
        @return {SymbolTable} -- newly created symbol table
    """
    def makeTable(self, _prev):
        # create a new symbol table table
        newST = ST()
        # make _prev the parent of new table
        newST.parent = _prev
        # return the new symbol table
        return newST


    """ Lookup
        Loopup for the symbol in the activation record
        @params _symbol {string} -- symbol for which look to be done
        @return {bool} -- symbol found or not
    """
    def lookup(self, _symbol):
        return self.currActive.search(_symbol)


    """ Lookup In Root for a symbol. Used to lookup for globals and functions 
        Loopup for the symbol in the activation record
        @params _symbol {string} -- symbol for which look to be done
        @return {bool} -- symbol found or not
    """
    def lookupInRoot(self, _symbol):
        return self.root.search(_symbol)

    """ Push
        push symbol table onto symbol tables stack
        @params _st {SymbolTable} -- symbol table to be pushed onto the stack
    """
    def push(self, _st):
        # push the new symbol on activation record's stack
        self.activeSTs.append(_st)


    """ Pop
        remove the top of the symbol tables stack
        @return {SymbolTable} -- removed symbol table
    """
    def pop(self):
        return self.activeSTs.pop()


    """ Insert into symbol table
        create a new entry into symbol table and put the data
        @params _name {string} -- name (key | id) of the new entry
        @params _type {string} -- type of name (an attribute)
        @params _offset {integer} -- size | offset for the name
    """
    def insert(self, _name, _type, _width, _scope=None):
        return self.currActive.insert(_name, _type, _width, _scope)


    """ Add link to global variable into the current active symbol table
        
    """
    def linkGlobalSym(self, _name, _attrs):
        self.currActive.linkGlobalSym(_name, _attrs)

    """ Enter a new entry for a procedure
        Create a new entry for a procedure.
        @params _name {string} -- name of the procedure
        @params _lineNumber {int}  -- liner number where function is defined
        @params _procST {SymbolTable} -- symbol table for a procedure
    """
    def enterProc(self, _name, _lineNumber, _numParams, _procST):
        self.ftable[_lineNumber] = {
            "place": _name,
            "numParams": _numParams,
            "st": _procST
        }
        return self.currActive.enterProc(_name, _lineNumber, _numParams, _procST)


    """ Set attribute
        set attributes for a symbol in symbol table
        @params _symbol {string} -- symbol for which attribute is to be set
        @params _key {string} -- key for the attribute
        @params _val {object} -- value for the attribute
    """
    def setAttr(self, _symbol, _key, _val):
        self.currActive.setAttr(_symbol, _key, _val)


    """ Get attribute
        get attribute for a symbol in symbol table
        @params _symbol {string} -- symbol for which attribute should be fetched
        @params _key {string} -- attribute key
        @return {object} -- value of attribute for key if found else None
    """
    def getAttr(self, _symbol, _key):
        return self.currActive.getAttr(_symbol, _key)


    """ Get all the attributes
        get attribute for a symbol in symbol table
        @params _symbol {string} -- symbol for which attribute should be fetched
        @return {object} -- value of attribute for key if found else None
    """
    def getAttrs(self, _symbol):
        return self.currActive.getAttrs(_symbol)


    """ Current Active Symbol Table
        (getter) get the current active symbol table. Just a convenient method
    """
    @property
    def currActive(self):
        # return the top
        return self.activeSTs[-1]



    """ Insert all the keywords
        insert the keywords into the root of the symbol table
        so that they can be easily accessible
    """
    def insertKeywords(self):

        return None
Example #36
0
class Compiler:
    def __init__(self, path):
        with open(path) as f:
            text = f.read()
        self.tokens = tokenize(text)
        self.count = 0
        self.ClassScope = SymbolTable()
        self.SubroutineScope = SymbolTable()
        self.label_count = {"if": 0, "while": 0}
        self.vmcode = ""

    def writeVMcode(self, path):
        self.compileClass()
        with open(path, "w") as f:
            f.write(self.vmcode)

    def takeWord(self):
        token = self.tokens[self.count]["token"]
        self.count += 1
        return token

    def advance(self):
        self.count += 1

    def compileClass(self):
        self.advance()  # class
        self.className = self.takeWord()  # className
        self.advance()  # {
        while self.tokens[self.count]["token"] in ["static", "field"]:
            self.compileClassVarDec()
        while self.tokens[self.count]["token"] in [
                "constructor", "method", "function"
        ]:
            self.compileSubroutine()
        self.advance()  # }

    def compileClassVarDec(self):
        symbolKind = self.takeWord()  # (static|field)
        symbolType = self.takeWord()  # type
        symbolName = self.takeWord()  # VarName
        self.ClassScope.define(symbolName, symbolType, symbolKind)
        while not self.tokens[self.count]["token"] == ";":
            self.advance()  # ,
            symbolName = self.takeWord()  # VarName
            self.ClassScope.define(symbolName, symbolType, symbolKind)
        self.advance()  # ;

    def compileSubroutine(self):
        self.SubroutineScope.reset()
        ftype = self.takeWord()  # (constructor|method|function)
        self.advance()  # type
        subroutineName = self.takeWord()  # SubroutineName
        self.advance()  # (
        self.complieParameterList()
        self.advance()  # )
        self.advance()  # {
        num_locals = 0
        while self.tokens[self.count]["token"] in ["var"]:
            num_locals += self.compileVarDec()
        self.vmcode += VMWriter.writeFunction(
            self.className + "." + subroutineName, num_locals)
        # if function is constructor, it should allocate memory for produced object
        if ftype == "constructor":
            # allocate needed memory
            num_field = self.ClassScope.varCount("field")
            self.vmcode += VMWriter.writePush("constant", num_field)
            self.vmcode += VMWriter.writeCall("Memory.alloc", 1)
            # link it to This
            self.vmcode += VMWriter.writePop("pointer", 0)
            # change all FIELD in ClassScope to THIS
            self.ClassScope.field2This()
        self.compileStatements()
        self.advance()  # }
        self.label_count["if"] = 0
        self.label_count["while"] = 0

    def complieParameterList(self):
        if self.tokens[self.count]["token"] == ")":  # has no parameters
            pass
        else:
            symbolKind = "argument"
            symbolType = self.takeWord()  # type
            symbolName = self.takeWord()  # VarName
            self.SubroutineScope.define(symbolName, symbolType, symbolKind)
            while self.tokens[self.count]["token"] == ",":
                self.advance()  # ,
                symbolType = self.takeWord()  # type
                symbolName = self.takeWord()  # VarName
                self.SubroutineScope.define(symbolName, symbolType, symbolKind)

    def compileVarDec(self):
        symbolKind = "local"
        self.advance()  # var
        symbolType = self.takeWord()  # type
        symbolName = self.takeWord()  # VarName
        self.SubroutineScope.define(symbolName, symbolType, symbolKind)
        num_locals = 1
        while self.tokens[self.count]["token"] == ",":
            self.advance()  # ,
            symbolName = self.takeWord()  # VarName
            self.SubroutineScope.define(symbolName, symbolType, symbolKind)
            num_locals += 1
        self.advance()  # ;
        return num_locals

    def compileStatements(self):
        if self.tokens[self.count] == "}":  # no statements
            pass
        else:
            while self.tokens[self.count]["token"] in [
                    "do", "let", "while", "return", "if"
            ]:
                if self.tokens[self.count]["token"] == "do":
                    self.compileDo()
                elif self.tokens[self.count]["token"] == "let":
                    self.compileLet()
                elif self.tokens[self.count]["token"] == "while":
                    self.compileWhile()
                elif self.tokens[self.count]["token"] == "return":
                    self.compileReturn()
                elif self.tokens[self.count]["token"] == "if":
                    self.compileIf()

    def compileDo(self):
        self.advance()  # do
        caller = self.takeWord()  # (className|varName)
        symbol = self.findSymbol(caller)
        # if object exist, push it in to stack
        if symbol:
            self.advance()  # .
            func = self.takeWord()  # subroutineName
            segment = 'local'
            index = symbol['index']
            self.vmcode += VMWriter.writePush(segment, index)
            symbolType = symbol['type']
        # Static method called
        elif self.tokens[self.count]["token"] == ".":
            symbolType = caller
            self.advance()  # .
            func = self.takeWord()  # subroutineName
        else:
            symbolType = self.className
            func = caller
        # decide subroutine name
        subroutineName = symbolType + '.' + func
        self.advance()  # (
        num_args = self.compileExpressionList()
        if symbol:
            # add "this"
            num_args += 1
        # call the function
        self.vmcode += VMWriter.writeCall(subroutineName, num_args)
        # since this code will not save returned value in to variable, pop it out.
        self.vmcode += VMWriter.writePop('temp', '0')
        self.advance()  # )
        self.advance()  # ;

    def compileLet(self):
        self.advance()  # let
        symbolName = self.takeWord()  # varName
        symbol = self.findSymbol(symbolName)
        isArray = self.tokens[self.count]["token"] == "["
        if isArray:
            self.advance()  # [
            # this will push value to stack
            self.compileExpression()
            # push base address
            self.vmcode += VMWriter.writePush(symbol['kind'], symbol['index'])
            # add two addresses
            self.vmcode += VMWriter.writeArithmetic('+')
            self.advance()  # ]
        self.advance()  # =
        # this will push result to stack
        self.compileExpression()
        if not isArray:
            self.vmcode += VMWriter.writePop(symbol['kind'], symbol['index'])
        else:
            # store result to temp 0
            self.vmcode += VMWriter.writePop('temp', '0')
            # store address of array to THAT
            self.vmcode += VMWriter.writePop('pointer', '1')
            # restore result
            self.vmcode += VMWriter.writePush('temp', '0')
            # save result to THAT
            self.vmcode += VMWriter.writePop('that', '0')
        self.advance()  # ;

    def compileWhile(self):
        # label whileStart
        self.vmcode += VMWriter.writeLabel("WHILE_START_" +
                                           str(self.label_count["while"]))
        self.advance()  # while
        self.advance()  # (
        self.compileExpression()
        self.advance()  # )
        # if not exp == True: goto whileEnd
        self.vmcode += VMWriter.writeArithmetic("~")
        self.vmcode += VMWriter.writeIfGoto("WHILE_END_" +
                                            str(self.label_count["while"]))
        self.advance()  # {
        self.compileStatements()
        # goto whileStart
        self.vmcode += VMWriter.writeGoto("WHILE_START_" +
                                          str(self.label_count["while"]))
        self.advance()  # }
        # label whileEnd
        self.vmcode += VMWriter.writeLabel("WHILE_END_" +
                                           str(self.label_count["while"]))
        self.label_count["while"] += 1

    def compileReturn(self):
        self.advance()  # return
        if not self.tokens[self.count]["token"] == ";":
            # push result to stack
            self.compileExpression()
        else:
            # push constant 0 to stack for void
            self.vmcode += VMWriter.writePush("constant", "0")
        self.advance()  # ;
        self.vmcode += VMWriter.writeReturn()

    def compileIf(self):
        self.advance()  # if
        self.advance()  # (
        self.compileExpression()
        self.advance()  # )
        # if expression: goto IF_TRUE
        self.vmcode += VMWriter.writeIfGoto("IF_TRUE_" +
                                            str(self.label_count["if"]))
        # if not expression: goto IF_FALSE
        self.vmcode += VMWriter.writeGoto("IF_FALSE_" +
                                          str(self.label_count["if"]))
        # label IF_TRUE
        self.vmcode += VMWriter.writeLabel("IF_TRUE_" +
                                           str(self.label_count["if"]))
        self.advance()  # {
        self.compileCondStatements()
        self.advance()  # }
        # else exist
        if self.tokens[self.count]["token"] == "else":
            self.advance()  # else
            self.advance()  # {
            # if excuted if part, got IF_END
            self.vmcode += VMWriter.writeGoto("IF_END_" +
                                              str(self.label_count["if"]))
            # label IF_FALSE
            self.vmcode += VMWriter.writeLabel("IF_FALSE_" +
                                               str(self.label_count["if"]))
            self.compileCondStatements()
            # label IF_END
            self.vmcode += VMWriter.writeLabel("IF_END_" +
                                               str(self.label_count["if"]))
            self.advance()  # }
        else:
            # label IF_FALSE
            self.vmcode += VMWriter.writeLabel("IF_FALSE_" +
                                               str(self.label_count["if"]))

    def compileCondStatements(self):
        # nested if
        if self.tokens[self.count]["token"] == "if":
            self.label_count["if"] += 1
            self.compileStatements()
            self.label_count["if"] -= 1
        else:
            self.compileStatements()

    def compileExpression(self):
        # Order of operations is from front to back
        self.compileTerm()
        while self.tokens[self.count]["token"] in [
                "+", "-", "*", "/", "&", "|", "<", ">", "="
        ]:
            op = self.takeWord()  # op
            self.compileTerm()
            # execute op
            if op == "*":
                self.vmcode += VMWriter.writeCall("Math.multiply", 2)
            elif op == "/":
                self.vmcode += VMWriter.writeCall("Math.divide", 2)
            else:
                self.vmcode += VMWriter.writeArithmetic(op)

    def compileTerm(self):
        # unaryOP term
        if self.tokens[self.count]["token"] in ["-", "~"]:
            op = self.takeWord()  # op
            self.compileTerm()
            self.vmcode += VMWriter.writeArithmetic(op)
        # (exp)
        elif self.tokens[self.count]["token"] == "(":
            self.advance()  # (
            self.compileExpression()
            self.advance()  # )
        # Subroutine Call
        elif self.tokens[self.count + 1]["token"] in ["(", "."]:
            self.compileSubroutineCall()
        # Array element
        elif self.tokens[self.count + 1]["token"] == "[":
            self.compileArrayEXP()
        # intConst
        elif self.tokens[self.count]["Type"] == "INT_CONST":
            i = self.takeWord()
            self.vmcode += VMWriter.writePush("constant", i)
        # StringConst
        elif self.tokens[self.count]["Type"] == "STRING_CONST":
            s = self.takeWord()
            self.compileString(s)
        # KeyConst
        elif self.tokens[self.count]["Type"] == "KEYWORDS":
            word = self.takeWord()
            if word == "null":
                self.vmcode += VMWriter.writePush("constant", 0)
            elif word == "true":
                self.vmcode += VMWriter.writePush("constant", 0)
                self.vmcode += VMWriter.writeArithmetic("~")
            elif word == "false":
                self.vmcode += VMWriter.writePush("constant", 0)
            elif word == "this":
                self.vmcode += VMWriter.writePush("pointer", 0)
        # varName
        elif self.tokens[self.count]["Type"] == "IDENTIFIER":
            var = self.takeWord()
            symbol = self.findSymbol(var)
            segment = symbol['kind']
            index = symbol['index']
            self.vmcode += VMWriter.writePush(segment, index)

    def compileString(self, s):
        # use standard library String
        str_len = len(s)
        self.vmcode += VMWriter.writePush("constant", str_len)
        self.vmcode += VMWriter.writeCall("String.new", 1)
        for c in s:
            if not c == "\"":
                asciic = ord(c)
                self.vmcode += VMWriter.writePush("constant", asciic)
                self.vmcode += VMWriter.writeCall("String.appendChar", 2)

    def compileSubroutineCall(self):
        subroutine_name = ""
        if self.tokens[self.count + 1]["token"] == ".":
            for i in range(3):
                subroutine_name += self.takeWord(
                )  # (className|varName) . subroutineName
        else:
            subroutine_name = self.takeWord()  # subroutineName
        self.advance()  # (
        num_args = self.compileExpressionList()
        self.vmcode += VMWriter.writeCall(subroutine_name, num_args)
        self.advance()  # )

    def compileArrayEXP(self):
        symbolName = self.takeWord()  # varName
        symbol = self.findSymbol(symbolName)
        self.advance()  # [
        self.compileExpression()
        # push base address to stack
        self.vmcode += VMWriter.writePush("local", symbol['index'])
        # add index(expression result) and base addresses
        self.vmcode += VMWriter.writeArithmetic('+')
        # pop address into THAT
        self.vmcode += VMWriter.writePop("pointer", "1")
        # push value to stack
        self.vmcode += VMWriter.writePush("that", "0")
        self.advance()  # ]

    def compileExpressionList(self):
        num_args = 0
        if self.tokens[self.count]["token"] == ")":
            return num_args
        else:
            self.compileExpression()
            num_args += 1
            while self.tokens[self.count]["token"] == ",":
                self.advance()  # ,
                self.compileExpression()
                num_args += 1
            return num_args

    def findSymbol(self, symbol):
        if symbol in [s["name"] for s in self.SubroutineScope.symbols]:
            return [
                s for s in self.SubroutineScope.symbols if s["name"] == symbol
            ][0]
        elif symbol in [s["name"] for s in self.ClassScope.symbols]:
            return [s for s in self.ClassScope.symbols
                    if s["name"] == symbol][0]
        else:
            return None
Example #37
0
 def visit_WhileInstr(self,node):
     # print "visiting While"
     node.condition.accept(self)
     self.symbolTable = SymbolTable(self.symbolTable,'while')
     node.instruction.accept(self)
     self.symbolTable = self.symbolTable.getParentScope()
Example #38
0
class CompilationEngine:

    _MyTokenizer = None
    _MyOutputFile = None
    _MyClassToken = None
    MyVMWriter = None
    CurrentSubroutine = None
    CurrentClass = None
    MySymbolTable = None
    LabelNum = 0
    numExpressions = 0

    _classTypes = ["int", "char", "boolean"]

    _classVarDecOpenings = ["static", "field"]

    _subroutineDecOpenings = ["constructor", "function", "method"]

    _statementOpenings = ["let", "if", "while", "do", "return"]

    _operators = ["+", "-", "*", "/", "&", "|", "<", ">", "="]
    _unaryOperators = {"-": "neg", "~": "not"}

    _keywordConstants = ["null", "true", "false", "this"]

    subroutineNames = []
    classNames = []
    varNames = []

    def __init__(self, Tokenizer, OutputFile):

        self._MyTokenizer = Tokenizer
        self._MyOutputFile = OutputFile
        self.MyVMWriter = VMWriter(self._MyOutputFile)
        self.MySymbolTable = SymbolTable()
        self.CurrentSubroutine = None
        self.CurrentClass = None
        self.LabelNum = 0
        self.numExpressions = 0

        return

    def compileClass(self):
        classText = "class"

        if self.token().text() != classText:
            try:
                raise Exception("Keyword '" + classText + "' expected\n")
            except Exception, err:
                sys.stderr.write(str(err))
                return

        self._MyClassToken = Token(None)

        # first insert: 'class'
        self.insert(self._MyClassToken, "class", Token.Keyword)
        # class name
        self.MySymbolTable = SymbolTable()
        self.CurrentClass = self.token().text()
        self.classNames.append(self.token().text())

        self.insert(self._MyClassToken, None, Token.Identifier)
        self.insert(self._MyClassToken, "{", Token.Symbol)

        while (self._MyTokenizer.hasMoreTokens()
               and self.token().text() in self._classVarDecOpenings):
            self.compileClassVarDec(self._MyClassToken)

        while (self._MyTokenizer.hasMoreTokens()
               and self.token().type() == Token.Keyword
               and self.token().text() in self._subroutineDecOpenings):
            self.compileSubroutineDec(self._MyClassToken)

        self.insert(self._MyClassToken, "}", Token.Symbol)
        self.CurrentClass = None
Example #39
0
 def __init__(self):
     super().__init__()
     self.st = SymbolTable()
class CompilationEngine:

    def __init__(self, input_stream: str, jack_tokenizer: JackTokenizer):
        """
        creates a new compilation engine with the given
        input and output.
        :param input_stream: given input stream
        :param jack_tokenizer: given jack tokenizer
        """
        self.tokenizer = jack_tokenizer
        self.tokens = jack_tokenizer.get_tokens()
        self.file_name = input_stream.replace(".jack", "")
        self.output_file_name = input_stream.replace(".jack", ".xml")
        self.output_file = open(self.output_file_name, "wb")
        self.current_class_name = None
        self.root = None
        self.label_counter = 0
        self.tree = None

        # ----- identifier type, project 11, Wednesday -------- #
        self.identifier_counter = {LOCAL: 0,
                                   ARGUMENT: 0,
                                   STATIC: 0,
                                   FIELD: 0}
        # ----------------------------------------------------- #

        self.symbol_table = SymbolTable()
        self.VMWriter = None

    def compile(self) -> None:
        """
        method to compile jack file and close file afterwards
        :return: none
        """
        self.tokenizer.advance()
        self.compile_class()
        self.output_file.close()

    def compile_class(self) -> None:
        """
        compiles a class
        :return: None
        """

        # create VMWriter for current class
        self.VMWriter = VMWriter(self.file_name)

        # was class
        self.tokenizer.advance()
        # now name

        # current class name :
        self.current_class_name = self.tokenizer.get_current_token()[1]

        # was name
        self.tokenizer.advance()
        # now {

        # was {
        self.tokenizer.advance()
        # now class body

        while self.tokenizer.has_more_tokens():
            current_token = self.tokenizer.get_current_token()
            token_string = current_token[1]
            if CompilationEngine.is_class_field(token_string):
                self.compile_class_var_declaration()
            elif CompilationEngine.is_subroutine(token_string):
                self.compile_subroutine()

        # insert last  "}" of end of class
        current_token = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()

        # # ***** testing  ***** #
        # tree = etree.ElementTree(self.root)
        # # etree.indent(self.root, "")
        # tree.write(self.output_file, pretty_print=True)

    @staticmethod
    def is_subroutine(token: str) -> bool:
        """
        method to check if token is subroutine
        :param token: string of current token
        :return: true if subroutine declaration, false otherwise
        """
        return ((token == "constructor") or (token == "function") or (
                token == "method"))

    @staticmethod
    def is_var_declare(token: str) -> bool:
        return token == "var"

    @staticmethod
    def is_class_field(token: str) -> bool:
        """
        method to check if token is class field
        :param token: string of current token
        :return: true if class field declaration, false otherwise
        """
        return (token == "static") or (token == "field")

    @staticmethod
    def is_statement(token: str) -> bool:
        return (token == LET) or (token == IF) or (token == WHILE) or (
                token == DO) or (token == RETURN)

    def insert_next_token(self, root) -> None:
        """
        insert next token
        :return: none
        """
        current_token = self.tokenizer.get_current_token()
        token_type = current_token[0]
        token_string = current_token[1]

        if token_type == JackTokenizer.STRING_TYPE:
            token_string = token_string[1:-1]

        etree.SubElement(root, token_type).text = " " + token_string + " "
        self.tokenizer.advance()

    def compile_class_var_declaration(self) -> None:
        """
        compiles a variable declaration
        :return: None
        """

        # variable kind: field | static
        kind = self.tokenizer.get_current_token()[1]
        # field | static
        self.tokenizer.advance()

        # variable type
        type_var = self.tokenizer.get_current_token()[1]
        # int|char|boolean
        self.tokenizer.advance()

        # variable name
        name = self.tokenizer.get_current_token()[1]
        # varName
        self.tokenizer.advance()

        # adding to symbol table
        if kind == STATIC:
            # static variable
            self.identifier_counter[STATIC] += 1
        else:
            # class field
            self.identifier_counter[FIELD] += 1

        # adding to symbol table anyways
        self.symbol_table.define(name, type_var, kind)

        # run in a loop and print all names, with "," in between
        while self.tokenizer.current_word == COMMA:
            # ,
            self.tokenizer.advance()

            # need to add to symbol table as well
            # type is as before, and kind is as before
            # still needs to add to counter
            name = self.tokenizer.get_current_token()[1]

            # adding to symbol table
            if kind == STATIC:
                # static variable
                self.identifier_counter[STATIC] += 1
            else:
                # class field
                self.identifier_counter[FIELD] += 1

            # adding to symbol table anyways
            self.symbol_table.define(name, type_var, kind)

            # varName
            self.tokenizer.advance()

        # end of declaration
        # ;
        current_token = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()

    def compile_subroutine(self) -> None:
        """
        compiles a complete method
        function or constructor
        :return: None
        """

        # restart as a new subroutine
        self.symbol_table.start_subroutine()

        # constructor | function | method
        subroutine_type = self.tokenizer.get_current_token()[1]

        # add this if it is a method
        if subroutine_type == METHOD:
            name = THIS
            var_type = self.current_class_name
            kind = ARGUMENT
            self.symbol_table.define(name, var_type, kind)

        # was function type
        self.tokenizer.advance()
        # now return type

        # was return type
        self.tokenizer.advance()
        # now subroutine name
        subroutine_name = self.tokenizer.get_current_token()[1]

        subroutine_name = self.current_class_name + DOT + subroutine_name

        # was name
        self.tokenizer.advance()
        # now (

        # parameter list compilation
        # and inserting it into the subtree
        self.compile_parameter_list()

        # was )
        self.tokenizer.advance()
        # now {

        # subroutine body
        self.compile_subroutine_body(subroutine_name, subroutine_type)

        # was }
        self.tokenizer.advance()
        # now token
        return

    def compile_subroutine_body(self, subroutine_name: str,
                                subroutine_type: str):
        """
        method to compile subroutine body
        :return: None
        """

        n_locals = self.symbol_table.variable_counter[FIELD]

        # {
        current_token = self.tokenizer.get_current_token()[1]

        # vars inside
        var_count = 0

        # was {
        self.tokenizer.advance()
        current_token = self.tokenizer.get_current_token()[1]
        # now subroutine body

        # read all variable declares
        while CompilationEngine.is_var_declare(current_token):
            # adding var declare subtree
            # to subroutine body element tree
            var_count = var_count + self.compile_var_declaration()
            current_token = self.tokenizer.get_current_token()[1]

        # function declare line
        self.VMWriter.write_function(subroutine_name, var_count)

        # putting this
        if subroutine_type == CONSTRUCTOR:
            # allocate memory for object
            # subroutine is constructor

            # push const nLocals
            self.VMWriter.write_push(CONSTANT, n_locals)
            # call Memory.alloc 1
            self.VMWriter.write_call(ALLOCATION_METHOD, ONE_NUM)
            # (popping this): pop pointer 0
            self.VMWriter.write_pop(POINTER, ZERO_NUM)

        elif subroutine_type == METHOD:
            # push argument 0
            self.VMWriter.write_push(ARGUMENT, ZERO_NUM)
            # push pop pointer 0
            self.VMWriter.write_pop(POINTER, ZERO_NUM)

        # subroutine statements
        self.compile_statements()

        # }
        self.tokenizer.advance()

    def compile_var_declaration(self) -> int:
        """
        method to compile var declaration lines
        """

        var_count = 0

        # was var kind (var)
        kind = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()
        # now type

        # get type which is int|char|boolean|class
        type_var = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()
        # now name

        # get name which is int|char|boolean|class
        name = self.tokenizer.get_current_token()[1]
        self.tokenizer.advance()
        # now , or ;

        # adding to symbol table
        self.symbol_table.define(name, type_var, kind)

        var_count += 1

        # run in a loop and print all names, with "," in between
        while self.tokenizer.current_word == COMMA:
            # was ,
            var_count += 1
            self.tokenizer.advance()
            # now name

            # get name which for the int|char|boolean|class var
            name = self.tokenizer.get_current_token()[1]
            self.tokenizer.advance()
            # now , or ;

            # adding to symbol table
            self.symbol_table.define(name, type_var, kind)

        # end of declaration

        # was ;
        self.tokenizer.advance()
        # now next line
        return var_count

    def compile_parameter_list(self) -> int:
        """
        compiles a (CAN BE EMPTY) parameter list
        not including the enclosing "()"
        :return: var count of parameter list
        """
        var_count = 0

        # was (
        self.tokenizer.advance()
        current_token = self.tokenizer.get_current_token()[1]
        # now arguments or )

        # till we at the end of the param line -> ")"
        if current_token != END_OF_PARAM_LIST:

            var_count += 1
            kind = ARGUMENT

            # was var_type
            var_type = self.tokenizer.get_current_token()[1]
            self.tokenizer.advance()
            # now var name

            # was var_name
            name = self.tokenizer.get_current_token()[1]
            self.tokenizer.advance()
            # now , or )

            # possible_variable = self.get_variable_of_table(name)
            # if possible_variable is None:
            self.symbol_table.define(name, var_type, kind)
            # otherwise its inside

            current_token = self.tokenizer.get_current_token()[1]

            # go through param list
            while current_token == COMMA:
                var_count += 1

                # was ,
                self.tokenizer.advance()
                # now type

                # var_type
                var_type = self.tokenizer.get_current_token()[1]
                self.tokenizer.advance()
                # now var name

                # var_name
                name = self.tokenizer.get_current_token()[1]

                # possible_variable = self.get_variable_of_table(name)
                # if possible_variable is None:
                self.symbol_table.define(name, var_type, kind)
                # otherwise its inside

                self.tokenizer.advance()
                # now comma or )

                # check again current token
                current_token = self.tokenizer.get_current_token()[1]
        return var_count

    def compile_statements(self) -> None:
        """
        compiles a sequence of statements
        not including the enclosing {}
        :return: None
        """
        # statement
        current_token = self.tokenizer.get_current_token()[1]

        if current_token == END_OF_CLASS:
            # end of function we return
            return

        peek_at_next = current_token

        # peek statements as long as we have them
        # determine their type
        # add the statement block to the
        # over all statements blocks

        while CompilationEngine.is_statement(peek_at_next):
            # pretty much straight forward
            # we have some types of statements
            # and we need to find out which one
            # and send to the fitting compilation method
            if peek_at_next == LET:
                self.compile_let()
            elif peek_at_next == IF:
                self.compile_if()
            elif peek_at_next == WHILE:
                self.compile_while()
            elif peek_at_next == DO:
                self.compile_do()
            elif peek_at_next == RETURN:
                self.compile_return()
            # adding the statement was done inside
            # getting the token we are on
            peek_at_next = self.tokenizer.peek_at_next_token()[1]

    def compile_do(self) -> None:
        """
        compiles a do statement
        :return: None
        """

        # peeked on do
        # now advanced to do
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != DO:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]

        # do
        self.tokenizer.advance()
        # what to do

        # --------------------------------------------- #
        # compilation of subroutine or some class routine
        # --------------------------------------------- #

        # subroutine_name
        # ------- or, for another class method  ---------
        # class_name  -> then .subroutine_name

        rout_or_class_name = self.tokenizer.get_current_token()[1]

        peek_at_token = self.tokenizer.peek_at_next_token()[1]

        if peek_at_token != START_OF_PARAM_LIST:
            self.tokenizer.advance()

        self.compile_call(rout_or_class_name)

        # now comes ;
        self.tokenizer.advance()

        # popping temp 0
        self.VMWriter.write_pop(TEMP, ZERO_NUM)

    def compile_let(self) -> None:
        """
        compiles a let statement
        --------------------
        let  "var_name" = "expression" ;
        --------------------
        :return: None
        """
        # peeked on let
        # now advanced to let
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != LET:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]
        not_array_flag = True

        # should be varName, might be varName []
        # was let
        self.tokenizer.advance()
        var_name = self.tokenizer.get_current_token()[1]

        # now var name

        # was var name
        self.tokenizer.advance()
        current_token = self.tokenizer.get_current_token()[1]
        # now  =  or [

        if current_token == ARRAY_OPENER:
            not_array_flag = False
            self.calculate_memory_location(var_name)

        # were on =
        self.tokenizer.advance()
        # now on expression

        self.compile_expression()

        # after expression
        # comes;
        self.tokenizer.advance()

        if not_array_flag:
            # not array, we pop variable
            variable = self.get_variable_of_table(var_name)
            var_kind = variable[KIND]

            segment = SymbolTable.get_segment(var_kind)
            var_index = variable[INDEX]
            self.VMWriter.write_pop(segment, var_index)
        else:
            # array, we pop array element
            # pop temp 0
            self.VMWriter.write_pop(TEMP, ZERO_NUM)
            # pop pointer 1
            self.VMWriter.write_pop(POINTER, ONE_NUM)
            # push temp 0
            self.VMWriter.write_push(TEMP, ZERO_NUM)
            # pop that 0
            self.VMWriter.write_pop(THAT, ZERO_NUM)

    def calculate_memory_location(self, var_name):
        """
        method to calculate location of current var index
        :param var_name: name of variable
        :return:
        """
        # pushing name
        variable = self.get_variable_of_table(var_name)

        var_kind = variable[KIND]

        segment = SymbolTable.get_segment(var_kind)
        var_index = variable[INDEX]

        # after [
        self.tokenizer.advance()

        # expression inside array
        self.compile_expression()

        self.VMWriter.write_push(segment, var_index)
        # write add to add memory places
        self.VMWriter.write_arithmetic(ADD)

        # were on whats inside array
        self.tokenizer.advance()
        # now on ]

        # were on ]
        self.tokenizer.advance()
        # now on expression

    def compile_while(self):
        """
        compiles a while statement
        --------------------
        while  ( "expression" )
        { "statements }
        --------------------
        :return: None
        """

        # peeked on while
        # now advanced to let
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != WHILE:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]

        # label L1
        while_label = self.label_generator()
        self.VMWriter.write_label(while_label)

        # while
        self.tokenizer.advance()
        # (
        self.tokenizer.advance()

        # expression of while
        self.compile_expression()

        # ~(cond)
        # negate condition
        negate = BINARY_DICT["~"]
        self.VMWriter.write_arithmetic(negate)
        #  --------------------  #

        # )
        self.tokenizer.advance()

        # if-goto L2
        after_while_label = self.label_generator()
        self.VMWriter.write_if(after_while_label)

        # {
        self.tokenizer.advance()

        # statement
        self.tokenizer.advance()

        self.compile_statements()

        # goto L1
        self.VMWriter.write_goto(while_label)

        # label L2
        self.VMWriter.write_label(after_while_label)

        # }
        self.tokenizer.advance()

    def compile_return(self) -> None:
        """
        compiles a return statement
        :return: None
        """
        # peeked on return
        # now advanced to return
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != RETURN:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]

        value_to_return = self.tokenizer.peek_at_next_token()[1]

        if value_to_return == COMMA_DOT:
            # no value to return
            self.tokenizer.advance()
            self.VMWriter.write_push(CONSTANT, ZERO_NUM)
            self.VMWriter.write_return()
            return

        # evaluate return value
        self.tokenizer.advance()
        self.compile_expression()
        self.VMWriter.write_return()

        # ;
        self.tokenizer.advance()

    def compile_if(self):
        """
        compiles an if statement
        possibly with a trailing else clause
        --------------------
        if  ( "expression" )
        { "statements }
        - might be
        else {
        }
        --------------------
        :return: None
        """

        # peeked on if
        # now advanced to if
        current_token = self.tokenizer.get_current_token()[1]
        if current_token != IF:
            self.tokenizer.advance()
            current_token = self.tokenizer.get_current_token()[1]

        L1 = self.label_generator()
        L2 = self.label_generator()

        # was if now (
        self.tokenizer.advance()

        # cond
        # build if expression
        self.compile_expression()

        # ~(cond)
        # negate condition
        negate = BINARY_DICT["~"]
        self.VMWriter.write_arithmetic(negate)
        #  --------------------  #

        # )
        self.tokenizer.advance()

        # if-goto L1
        self.VMWriter.write_if(L1)
        #  --------------------  #

        # {
        self.tokenizer.advance()

        # insert whats inside if() { lalla }

        # VM code for s1
        self.compile_statements()
        #  --------------------  #

        # goto L2
        self.VMWriter.write_goto(L2)
        #  --------------------  #

        # }
        self.tokenizer.advance()

        # now we might have else:
        current_token = self.tokenizer.get_current_token()[1]
        current_peek = self.tokenizer.peek_at_next_token()[1]

        # label L1
        self.VMWriter.write_label(L1)
        #  --------------------  #

        # statements 2 is else :
        if (current_peek == ELSE) | (current_token == ELSE):
            if current_peek == ELSE:
                self.tokenizer.advance()
            # now else
            self.tokenizer.advance()
            # {
            self.tokenizer.advance()

            self.compile_statements()

            # }
            self.tokenizer.advance()

        # label L2
        self.VMWriter.write_label(L2)
        #  --------------------  #

    def compile_expression(self) -> None:
        """
        compiles an expression
        --------------------
        term (optional term)?
        term: var_name or constant
              - var_name: string with no digit
              - constant: decimal number
        --------------------
        :return: tree of an expression
        """

        # first term
        self.compile_term()

        peek_at_token = self.tokenizer.peek_at_next_token()[1]

        while peek_at_token in BINARY_OPERATORS:
            # binary op
            self.tokenizer.advance()
            operation = self.tokenizer.get_current_token()[1]

            # expression
            self.tokenizer.advance()

            # compile term
            self.compile_term()

            arithmetic_command = BINARY_DICT[peek_at_token]
            self.VMWriter.write_arithmetic(arithmetic_command)

            # renew again
            peek_at_token = self.tokenizer.peek_at_next_token()[1]

    def compile_term(self) -> None:
        """
        compiles a term.
        if the current token is an identifier  we distinguish between
        - a variable: .
        - an array entry: [
        - subroutine call: (
        :return: None
        """

        # get current token we insert
        current_token = self.tokenizer.get_current_token()
        token_type = current_token[0]
        token_string = current_token[1]

        # integerConstant
        if token_type == JackTokenizer.INT_TYPE:
            self.VMWriter.write_push(CONSTANT, token_string)

        # stringConstant
        elif token_type == JackTokenizer.STRING_TYPE:
            # construction of string inside
            self.construct_string(token_string)

        # keywordConstant
        elif token_type == JackTokenizer.KEYWORD_TYPE:
            if token_string == TRUE:
                self.VMWriter.write_push(CONSTANT, ZERO_NUM)
                neg_op = BINARY_DICT["~"]
                self.VMWriter.write_arithmetic(neg_op)
            if token_string == FALSE:
                self.VMWriter.write_push(CONSTANT, ZERO_NUM)
            elif token_string == THIS:
                self.VMWriter.write_push(POINTER, ZERO_NUM)
            elif token_string == NULL:
                self.VMWriter.write_push(CONSTANT, ZERO_NUM)

        # unaryOperator {- , ~}
        elif token_string in UNARY_OPERATORS:

            # operator to print after expression

            # we can not sub anything, we negate.
            if token_string == "-":
                token_string = "!"

            op = BINARY_DICT[token_string]

            self.tokenizer.advance()

            # create a term of the inside of the operator
            self.compile_term()
            # neg if -
            # not if ~
            self.VMWriter.write_arithmetic(op)
            # advance to next term

        # anyways we have a varNam or, varName[] or, subroutineCall () or ()

        # ( -> some expression -> )
        elif token_string == START_OF_PARAM_LIST:
            # (
            self.tokenizer.advance()
            # insert expression
            self.compile_expression()
            # )
            # advance to next term
            self.tokenizer.advance()

        else:
            # was some identifier
            possibly_parent = self.tokenizer.peek_at_next_token()[1]
            # now . or [

            # pretty much straight forward:
            # 1. array opener []
            # 2. expression opener () # function call
            # 3. className. -> and then # 2. call of subroutineName()
            # 4. simple varName
            if possibly_parent == ARRAY_OPENER:
                self.tokenizer.advance()
                self.array_variable(token_string)
            elif possibly_parent == START_OF_PARAM_LIST:
                # subroutine call immediately
                # (
                # lets compile it as a call.
                self.compile_call(token_string)
            elif possibly_parent == DOT:
                # .
                self.tokenizer.advance()
                # we have a possible className in token_string
                # now we will have a subroutine name and call
                self.compile_call(token_string)
            else:
                self.simple_variable(token_string)

    def simple_variable(self, var_name) -> None:
        """
        method to push simple variable
        :param var_name: var name we push
        :return: None
        """

        variable = self.get_variable_of_table(var_name)

        var_kind = variable[KIND]
        segment = SymbolTable.get_segment(var_kind)
        var_index = variable[INDEX]
        self.VMWriter.write_push(segment, var_index)

    def array_variable(self, var_name):

        variable = self.get_variable_of_table(var_name)

        var_kind = variable[KIND]
        var_index = variable[INDEX]
        segment = SymbolTable.get_segment(var_kind)

        # [
        self.tokenizer.advance()

        # expression inside []
        self.compile_expression()

        # push start of array
        self.VMWriter.write_push(segment, var_index)

        # handling writing to an array element
        # adding to base address, the expression
        self.VMWriter.write_arithmetic(ADD)
        # pop pointer 1
        self.VMWriter.write_pop(POINTER, ONE_NUM)
        # push that 0
        self.VMWriter.write_push(THAT, ZERO_NUM)

        # closing array
        # ]
        self.tokenizer.advance()

    def compile_expression_list(self) -> int:
        """
        compiles (might be empty list) a comma separated
        list of expression
        :return: amount of expressions
        """
        current_token = self.tokenizer.get_current_token()[1]
        # we are on (
        self.tokenizer.advance()
        # now we on ) or argument

        arguments_count = 0

        # we start unless we are already at ")"
        # just like with param list

        # or arg or )
        current_token = self.tokenizer.get_current_token()[1]

        if current_token != END_OF_PARAM_LIST:
            arguments_count += 1

            # compiling argument
            self.compile_expression()

            # close of expression
            self.tokenizer.advance()

            # renew current token
            current_token = self.tokenizer.get_current_token()[1]

            while current_token == COMMA:
                # was , -> now ) or argument
                self.tokenizer.advance()

                # now new argument
                arguments_count += 1
                # new expression tree
                self.compile_expression()
                # on term
                self.tokenizer.advance()
                # and go again, renew current token
                current_token = self.tokenizer.get_current_token()[1]

        return arguments_count

    def label_generator(self) -> str:
        """
        helper method
        method to generate new label
        :return: str of new label
        """
        label = LABEL + str(self.label_counter)
        self.label_counter += 1
        return label

    def construct_string(self, token_string):
        # need to call String.new
        token_string = token_string[1:-1]
        memory_to_alloc = len(token_string)
        self.VMWriter.write_push(CONSTANT, memory_to_alloc)
        # calling String.new 1, empty string of size (memory to alloc)
        self.VMWriter.write_call(STRING_ALLOC_METHOD, ONE_NUM)
        # need to add ascii value of chars:
        for char_of_string in token_string:
            ascii_value = ord(char_of_string)
            self.VMWriter.write_push(CONSTANT, ascii_value)
            self.VMWriter.write_call(STRING_APPENDING, TWO_NUM)

    def compile_call(self, rout_or_class_name) -> None:
        """
        method to compile call
        :param rout_or_class_name:  name of class or subroutine
        :return: none
        """
        variable = self.get_variable_of_table(rout_or_class_name)

        if variable is not None:
            rout_or_class_name = variable[TYPE]
            subroutine_type = variable[TYPE]
            var_index = variable[INDEX]
            var_kind = SymbolTable.get_segment(variable[KIND])
            self.VMWriter.write_push(var_kind, var_index)
        else:
            subroutine_type = None

        # . or subroutine name
        current_token = self.tokenizer.get_current_token()[1]
        if current_token == DOT:
            # it is a call for a className.subroutineName

            # was .
            self.tokenizer.advance()
            # now subroutine name

            # subroutine_name
            subroutine_name = self.tokenizer.get_current_token()[1]

            # Class.Subroutine
            subroutine_name = rout_or_class_name + DOT + subroutine_name
        else:
            # a subroutine name
            self.VMWriter.write_push(POINTER, ZERO_NUM)
            subroutine_name = self.current_class_name + DOT + rout_or_class_name
            subroutine_type = METHOD

        if (subroutine_type is None) | (subroutine_type == VOID):
            # other class of void
            arguments = 0
        else:
            # method or constructor
            arguments = 1

        # start of expression list
        # ------------------------
        # was subroutine name
        self.tokenizer.advance()
        # now (

        # compilation of expression list
        arguments = arguments + self.compile_expression_list()

        # -------------------- #
        # end of expression list
        # -------------------- #

        # call subroutine_name arguments
        self.VMWriter.write_call(subroutine_name, arguments)

    def get_variable_of_table(self, var_name):
        """
        method to get variable of one of tables
        :param var_name: var name to get
        :return: dict of variable
        """
        variable = None
        # if in both
        if (var_name in self.symbol_table.variable_table.keys()) & \
                (var_name in self.symbol_table.subroutine_table.keys()):
            variable = self.symbol_table.subroutine_table[var_name]
        elif var_name in self.symbol_table.variable_table.keys():
            variable = self.symbol_table.variable_table[var_name]
        elif var_name in self.symbol_table.subroutine_table.keys():
            variable = self.symbol_table.subroutine_table[var_name]
        return variable
from Tokenizer import tokenType, tokenWrap, insideAstring

from Parser import Parser

import glob

import os

nonTerminals = [
    'class', 'classVarDec', 'subroutineDec', 'parameterList', 'subroutineBody',
    'varDec', 'statements', 'whileStatement', 'ifStatement', 'returnStatement',
    'letStatement', 'doStatement', 'expression', 'term', 'expressionList'
]

from SymbolTable import SymbolTable, Node, LinkedList
parserSymbolTable = SymbolTable()
# OS and other functions
#defineSubroutineTracker(self.subroutineName,'method' ,self.className,self.subroutineVoid)
parserSymbolTable.defineSubroutineTracker('deAlloc', '1', 'OS', True)  # void
parserSymbolTable.defineSubroutineTracker('keyPressed', '0', 'OS',
                                          False)  # not void
parserSymbolTable.defineSubroutineTracker('wait', '1', 'OS', True)  # void
# methods have k+1 because the plust one is the object 'argument 0' or 'pointer 0'
#parserSymbolTable.define('moveUp','1','OS')
#parserSymbolTable.define('moveDown','1','OS')
#parserSymbolTable.define('moveLeft','1','OS')
#parserSymbolTable.define('moveRight','1','OS')
#parserSymbolTable.define('incSize','1','OS')
#parserSymbolTable.define('dispose','1','OS')
#parserSymbolTable.define('decSize','1','OS')
parserSymbolTable.define('new', '1', 'OS')
Example #42
0
class TypeChecker(NodeVisitor):
    def __init__(self):
        self.symbol_table = SymbolTable(None, "global")
        self.loop_nest = 0
        self.errors = False

    def verify_matrices(self, operator, type_left, type_right, line):
        if operator == '*' or operator == '*=':
            if type_left.dim_Y != type_right.dim_X:
                print(
                    "Error in line: " + str(line) +
                    ": illegal operation: left matrix columns != right matrix rows."
                )
                self.errors = True
                return BadType()
        elif operator == '/' or operator == '/=':
            if type_right.dim_X != type_right.dim_Y:
                print("Error in line: " + str(line) +
                      ": illegal operation: right matrix is not invertible.")
                self.errors = True
                return BadType()
            elif type_left.dim_Y != type_right.dim_X:
                print(
                    "Error in line: " + str(line) +
                    ": illegal operation: left matrix columns != right matrix rows."
                )
                self.errors = True
                return BadType()
        else:
            if type_left.dim_X != type_right.dim_X or type_left.dim_Y != type_right.dim_Y:
                print("Error in line: " + str(line) +
                      ": illegal operation on different matrix size")
                self.errors = True
                return BadType()
        return Matrix(type_left.dim_X, type_right.dim_Y)

    def visit_Program(self, node):
        self.visit(node.instructions)

    def visit_Instructions(self, node):
        for instruction in node.instructions:
            self.visit(instruction)

    def visit_BinaryExpression(self, node):
        type_left = self.visit(node.expression_left)
        type_right = self.visit(node.expression_right)
        operator = node.operator

        if isinstance(type_left, VariableSymbol):
            if not isinstance(type_left.type, Matrix):
                if isinstance(type_right, VariableSymbol):
                    expected_type = result_types[operator][type_left.type][
                        type_right.type]
                else:
                    if not isinstance(type_right, Matrix):
                        expected_type = result_types[operator][
                            type_left.type][type_right]
                    else:
                        expected_type = result_types[operator][type_left.type][
                            type_right.__class__.__name__]
                if not expected_type:
                    print("Error in line: " + str(node.line) +
                          ": illegal operation " + str(type_left) + " " +
                          str(operator) + " " + str(type_right))
                    self.errors = True
                    return BadType()
                return expected_type
            else:
                if operator != '*' and operator != '/':
                    if not isinstance(type_right,
                                      VariableSymbol) and not isinstance(
                                          type_right, Matrix):
                        print("Error in line: " + str(node.line) +
                              ": illegal operation " + str(type_left) + " " +
                              str(operator) + " " + str(type_right))
                        self.errors = True
                        return BadType()
                    else:
                        if not isinstance(type_right,
                                          Matrix) and not isinstance(
                                              type_right.type, Matrix):
                            print("Error in line: " + str(node.line) +
                                  ": illegal operation " + str(type_left) +
                                  " " + str(operator) + " " + str(type_right))
                            self.errors = True
                            return BadType()
                        else:
                            if isinstance(type_right, Matrix):
                                return self.verify_matrices(
                                    operator, type_left.type, type_right,
                                    node.line)
                            else:
                                return self.verify_matrices(
                                    operator, type_left.type, type_right.type,
                                    node.line)
                else:
                    if isinstance(type_left, VariableSymbol):
                        if isinstance(type_right, VariableSymbol):
                            return self.verify_matrices(
                                operator, type_left.type, type_right.type,
                                node.line)
                        else:
                            return self.verify_matrices(
                                operator, type_left.type, type_right,
                                node.line)
                    else:
                        if isinstance(type_right, VariableSymbol):
                            return self.verify_matrices(
                                operator, type_left, type_right.type,
                                node.line)
                        else:
                            return self.verify_matrices(
                                operator, type_left, type_right, node.line)

        elif isinstance(type_left, Matrix):
            if isinstance(type_right, VariableSymbol):
                if not isinstance(type_right.type, Matrix):
                    print("Error in line: " + str(node.line) +
                          ": illegal operation on different matrix size")
                    self.errors = True
                    return BadType()
                else:
                    return self.verify_matrices(operator, type_left,
                                                type_right.type, node.line)

            elif isinstance(type_right, Matrix):
                return self.verify_matrices(operator, type_left, type_right,
                                            node.line)
            else:
                expected_type = result_types[operator][
                    type_left.__class__.__name__][type_right]
                if not expected_type:
                    print("Error in line: " + str(node.line) +
                          ": illegal operation on different matrix size")
                    self.errors = True
                    return BadType()
                return expected_type
        else:
            if isinstance(type_right, VariableSymbol):
                #print(operator + ":" + type_left + ":" + str(type_right.type))
                expected_type = result_types[operator][type_left][
                    type_right.type.__class__.__name__]
            else:
                if not isinstance(type_right, Matrix):
                    expected_type = result_types[operator][type_left][
                        type_right]
                else:
                    expected_type = result_types[operator][type_left][
                        type_right.__class__.__name__]
            if not expected_type:
                print("Error in line: " + str(node.line) +
                      ": illegal operation " + str(type_left) + " " +
                      str(operator) + " " + str(type_right))
                self.errors = True
                return BadType()
            return expected_type

    def visit_NegUnaryExpression(self, node):
        t = self.visit(node.expression)

        if isinstance(t, VariableSymbol):
            if isinstance(t.type, str):
                type = result_types['-'][t.type]
            else:
                type = result_types['-'][t.type.__class__.__name__]
        else:
            if isinstance(t, str):
                type = result_types['-'][t]
            else:
                type = result_types['-'][t.__class__.__name__]
        if not type:
            self.errors = True
            print("Error in line: " + str(node.line) +
                  ": invalid unary negation type")
        return type

    def visit_TransUnaryExpression(self, node):
        t = self.visit(node.expression)
        if isinstance(t, VariableSymbol):
            type = result_types['\''][t.type.__class__.__name__]
        else:
            type = result_types['\''][t.__class__.__name__]
        if not type:
            self.errors = True
            print("Error in line: " + str(node.line) +
                  ": invalid transposition type")
        return type

    def visit_Variable(self, node):
        definition = self.symbol_table.get(node.name)
        if definition is None:
            self.errors = True
            print("Error in line: " + str(node.line) + ": unknown variable")
            return None
        else:
            return definition

    def visit_Constant(self, node):
        return node.type

    def visit_CompoundInstruction(self, node):
        self.visit(node.instructions)

    def visit_Assignment(self, node):
        type = self.visit(node.expression)

        if isinstance(node.variable, MatrixElement):
            var = self.symbol_table.get(node.variable.variable)
            if var is None:
                print("Error in line " + str(node.line) +
                      ": no matrix with that name")
                self.errors = True
            else:
                self.visit(node.variable)

        else:
            var = self.symbol_table.get(node.variable.name)

            if var is not None:
                if str(var) != str(type):
                    print("Warning in line " + str(node.line) +
                          ": previously declared variable, type: " + str(var) +
                          " now reassigning with type: " + str(type))

            self.symbol_table.put(node.variable.name,
                                  VariableSymbol(node.variable.name, type))

            self.visit(node.variable)

    def visit_CompoundAssignment(self, node):
        expression = self.visit(node.expression)
        operator = node.operator
        if isinstance(node.variable, MatrixElement):
            var = self.symbol_table.get(node.variable.variable)
            if var is None:
                print("Error in line " + str(node.line) +
                      ": no matrix with that name")
                self.errors = True
            else:
                self.visit(node.variable)
        else:
            variable = self.symbol_table.get(node.variable.name)
            if not isinstance(variable.type, Matrix):
                if isinstance(expression, VariableSymbol):
                    expected_type = result_types[operator][variable.type][
                        expression.type]
                else:
                    expected_type = result_types[operator][
                        variable.type][expression]
                if not expected_type:
                    print("Error in line: " + str(node.line) +
                          ": illegal operation " + str(variable) + " " +
                          str(operator) + " " + str(expression))
                    self.errors = True
                    return BadType()
                return expected_type
            else:
                matrix_left = self.symbol_table.get(node.variable.name)
                if not isinstance(expression, VariableSymbol):
                    expected_type = result_types[operator][
                        variable.type.__class__.__name__][expression]
                    if not expected_type:
                        print("Error in line: " + str(node.line) +
                              ": illegal operation " + str(variable) + " " +
                              str(operator) + " " + str(expression))
                        self.errors = True
                        return BadType()
                    return expected_type
                else:
                    if not isinstance(expression.type, Matrix):
                        print("Error in line: " + str(node.line) +
                              ": illegal operation " + str(variable) + " " +
                              str(operator) + " " + str(expression))
                        self.errors = True
                        return BadType()
                    else:
                        return self.verify_matrices(operator, matrix_left.type,
                                                    expression.type, node.line)

    def visit_MatrixElement(self, node):

        x = self.visit(node.row)
        y = self.visit(node.column)

        if x == 'int' and y == 'int':
            id = node.variable

            row = node.row
            column = node.column
            t = self.symbol_table.get(id)
            if isinstance(t, VariableSymbol) and isinstance(t.type, Matrix):

                if row.value >= t.type.dim_Y or column.value >= t.type.dim_X:
                    self.errors = True
                    print("Error in line: " + str(node.line) +
                          ": index out of bound")
                    return BadType()
            elif isinstance(t, Matrix):
                if row.value >= t.dim_Y or column.value >= t.dim_X:
                    self.errors = True
                    print("Error in line: " + str(node.line) +
                          ": index out of bound")
                    return BadType()
            else:
                self.errors = True
                print("Error in line: " + str(node.line) +
                      ": this is not a matrix")

        else:
            print("Error in line: " + str(node.line) + ": index is not int")
            self.errors = True
            return BadType()

    def visit_ListsOfExpressions(self, node):
        size = -1
        for expression_list in node.expression_lists:
            next_size = self.visit(expression_list)
            if size == -1:
                size = next_size
            if size != next_size:
                print("Error in line: " + str(node.line) +
                      ": Different rows size " + str(size) + " and " +
                      str(next_size))
                self.errors = True
                return BadType()
        return Matrix(len(node.expression_lists), size)

    def visit_MatrixAssignment(self, node):
        var = self.symbol_table.get(node.variable.name)
        if var is not None:
            print(
                "Warning in line " + str(node.line) +
                ": previously declared variable, now reassigning with type: " +
                str(Matrix.__name__))
        matrix = self.visit(node.expression_list)
        self.symbol_table.put(node.variable.name,
                              VariableSymbol(node.variable.name, matrix))

    def visit_PrintExpression(self, node):
        for expression in node.expression_list:
            self.visit(expression)

    def visit_ListOfExpressions(self, node):
        for expression in node.expression_list:
            self.visit(expression)
        return len(node.expression_list)

    def visit_PrintInstructions(self, node):
        self.visit(node.expressions_list)

    def visit_ZerosInitialization(self, node):

        type = self.visit(node.expression)
        if isinstance(type, VariableSymbol):
            variable_type = type.type
            if variable_type != 'int':
                print("Error in line: " + str(node.line) +
                      ": cannot initialize zeros with " + variable_type)
                self.errors = True
                return BadType()
            dim = self.get_dim(node.expression)
            return Matrix(dim, dim)
        else:
            if type != 'int':
                print("Error in line: " + str(node.line) +
                      ": cannot initialize zeros with this expression")
                self.errors = True
                return BadType()
            dim = self.get_dim(node.expression)
            return Matrix(dim, dim)

    def visit_OnesInitialization(self, node):
        type = self.visit(node.expression)
        if isinstance(type, VariableSymbol):
            variable_type = type.type
            if variable_type != 'int':
                print("Error in line: " + str(node.line) +
                      ": cannot initialize ones with " + variable_type)
                self.errors = True
                return BadType()
            dim = self.get_dim(node.expression)
            return Matrix(dim, dim)
        else:
            if type != 'int':
                print("Error in line: " + str(node.line) +
                      ": cannot initialize ones with this expression")
                self.errors = True
                return BadType()
            dim = self.get_dim(node.expression)
            return Matrix(dim, dim)

    def visit_EyeInitialization(self, node):
        type = self.visit(node.expression)
        if isinstance(type, VariableSymbol):
            variable_type = type.type
            if variable_type != 'int':
                print("Error in line: " + str(node.line) +
                      ": cannot initialize eye with " + variable_type)
                self.errors = True
                return BadType()
            dim = self.get_dim(node.expression)
            return Matrix(dim, dim)
        else:
            if type != 'int':
                print("Error in line: " + str(node.line) +
                      ": cannot initialize eye with this expression")
                self.errors = True
                return BadType()
            dim = self.get_dim(node.expression)
            return Matrix(dim, dim)

    def visit_BreakInstruction(self, node):
        if self.loop_nest <= 0:
            print("Error in line: " + str(node.line) +
                  ": break outside the loop")
            self.errors = True
        return None

    def visit_ContinueInstruction(self, node):
        if self.loop_nest <= 0:
            print("Error in line: " + str(node.line) +
                  ": continue outside the loop")
            self.errors = True
        return None

    def visit_IfInstruction(self, node):
        self.visit(node.condition)
        inner_scope = SymbolTable(self.symbol_table, "if")
        self.symbol_table = inner_scope
        self.visit(node.instruction)
        self.symbol_table = self.symbol_table.getParentScope()

    def visit_IfElseInstruction(self, node):
        self.visit(node.condition)
        inner_scope = SymbolTable(self.symbol_table, "if")
        self.symbol_table = inner_scope
        self.visit(node.instruction)
        self.symbol_table = self.symbol_table.getParentScope()
        inner_scope = SymbolTable(self.symbol_table, "else")
        self.symbol_table = inner_scope
        self.visit(node.else_instruction)
        self.symbol_table = self.symbol_table.getParentScope()

    def visit_WhileInstruction(self, node):
        self.loop_nest = self.loop_nest + 1
        inner_scope = SymbolTable(self.symbol_table,
                                  'while' + str(self.loop_nest))
        self.symbol_table = inner_scope
        self.visit(node.condition)
        self.visit(node.instruction)
        self.symbol_table = self.symbol_table.getParentScope()
        self.loop_nest = self.loop_nest - 1

    def visit_ForInstruction(self, node):
        self.loop_nest = self.loop_nest + 1
        inner_scope = SymbolTable(self.symbol_table,
                                  'for' + str(self.loop_nest))
        self.symbol_table = inner_scope

        type = self.visit(node.start)
        if str(type) != 'int':
            print("Error in line: " + str(node.line) +
                  ": invalid range type: " + str(type))
            self.errors = True
        type = self.visit(node.end)
        if str(type) != 'int':
            print("Error in line: " + str(node.line) +
                  ": invalid range type: " + str(type))
            self.errors = True

        self.symbol_table.put(node.variable.name, type)
        self.visit(node.instruction)
        self.symbol_table = self.symbol_table.getParentScope()
        self.loop_nest = self.loop_nest - 1

    def visit_ReturnInstruction(self, node):
        type = self.visit(node.expression)
        if self.symbol_table.getParentScope() is None:
            print("Error in line: " + str(node.line) +
                  ": return in outer of scope")
            self.errors = True
            return BadType()
        return type

    def get_dim(self, val):
        if isinstance(val, Constant):
            return val.value
        elif isinstance(val, Variable):
            return val.name
        elif isinstance(val, int):
            return val
Example #43
0
 def visit_CompoundInstr(self, node):
     self.symbol_table = SymbolTable(self.symbol_table, "inner")
     self.visit(node.declarations)
     self.visit(node.instructions_opt)
     self.symbol_table = self.symbol_table.getParentScope()
Example #44
0
 def __init__(self):
     self.symbol_table = SymbolTable(None, "global")
     self.loop_nest = 0
     self.errors = False
Example #45
0
class TypeChecker(object):
    
    def __init__(self):
        self.errorsOcurred=False
        operators = ['+','-','*','/','%','|','&','^','&&','||','<<','>>','==','!=','<','>','<=','>=','f']
        types = ['int','float','string']
        self.ttype = dict((key,dict((key,{}) for key in types)) for key in operators)
        self.ttype['+']['int']['float'] = 'float'
        self.ttype['+']['float']['int'] = 'float'
        self.ttype['+']['float']['float'] = 'float'
        self.ttype['+']['int']['int'] = 'int'
        self.ttype['+']['string']['string'] = 'string'

        self.ttype['-']['int']['float'] = 'float'
        self.ttype['-']['float']['int'] = 'float'
        self.ttype['-']['float']['float'] = 'float'
        self.ttype['-']['int']['int'] = 'int'

        self.ttype['*']['int']['float'] = 'float'
        self.ttype['*']['float']['int'] = 'float'
        self.ttype['*']['float']['float'] = 'float'
        self.ttype['*']['int']['int'] = 'int'
        
        self.ttype['/']['int']['float'] = 'float'
        self.ttype['/']['float']['int'] = 'float'
        self.ttype['/']['float']['float'] = 'float'
        self.ttype['/']['int']['int'] = 'int'

        self.ttype['%']['int']['int'] = 'int'

        self.ttype['|']['int']['int'] = 'int'

        self.ttype['&']['int']['int'] = 'int'

        self.ttype['^']['int']['int'] = 'int'

        self.ttype['&&']['int']['int'] = 'int'

        self.ttype['||']['int']['int'] = 'int'

        self.ttype['<<']['int']['int'] = 'int'

        self.ttype['>>']['int']['int'] = 'int'

        self.ttype['==']['int']['int'] = 'int'
        self.ttype['==']['int']['float'] = 'int'
        self.ttype['==']['float']['int'] = 'int'
        self.ttype['==']['float']['float'] = 'int'
        self.ttype['==']['string']['string'] = 'int'

        self.ttype['!=']['int']['int'] = 'int'
        self.ttype['!=']['int']['float'] = 'int'
        self.ttype['!=']['float']['int'] = 'int'
        self.ttype['!=']['float']['float'] = 'int'
        self.ttype['!=']['string']['string'] = 'int'

        self.ttype['<']['int']['int'] = 'int'
        self.ttype['<']['int']['float'] = 'int'
        self.ttype['<']['float']['int'] = 'int'
        self.ttype['<']['float']['float'] = 'int'
        self.ttype['<']['string']['string'] = 'int'

        self.ttype['>']['int']['int'] = 'int'
        self.ttype['>']['int']['float'] = 'int'
        self.ttype['>']['float']['int'] = 'int'
        self.ttype['>']['float']['float'] = 'int'
        self.ttype['>']['string']['string'] = 'int'

        self.ttype['<=']['int']['int'] = 'int'
        self.ttype['<=']['int']['float'] = 'int'
        self.ttype['<=']['float']['int'] = 'int'
        self.ttype['<=']['float']['float'] = 'int'
        self.ttype['<=']['string']['string'] = 'int'

        self.ttype['>=']['int']['int'] = 'int'
        self.ttype['>=']['int']['float'] = 'int'
        self.ttype['>=']['float']['int'] = 'int'
        self.ttype['>=']['float']['float'] = 'int'
        self.ttype['>=']['string']['string'] = 'int'
        
        self.ttype['f']['string']['string'] = 'string'
        self.ttype['f']['int']['int'] = 'int'
        self.ttype['f']['float']['float'] = 'float'
        self.ttype['f']['float']['int'] = 'float'
    
    def error(self,text,line):
        self.errorsOcurred=True
        print "********************************"
        print "Error: "+text
        print "Line " +str(line) 
        print "********************************"
    
    def visit_Program(self,node):
        try:
            #print "visiting Program"
            self.symbolTable=SymbolTable(None,'main')
            node.declarations.accept(self)
            node.fundefs.accept(self)
            node.instructions.accept(self)
        except:
            self.error("could not continue parsing, correct errors first",0)

    def visit_Declarations(self,node):
        #print "visiting Declarations"
        for element in node.list :
            element.accept(self)
    
    def visit_Declaration(self,node):
        #print "visiting Declaration"
        declType = node.type
        allInits = node.inits.accept(self)
        for element in allInits:
            [type,id] = element
            if self.symbolTable.get(id.value) != None:
                self.error("Symbol: "+id.value+", was previusly declared",id.line)
            try:
                self.ttype['f'][declType][type]
            except:
                self.error("cannot initialize symbol of type: "+declType+", with expression of type: "+type,id.value)
            self.symbolTable.put(id.value,type)
    
    def visit_Inits(self,node):
        #print "visiting Inits"
        toReturn=[]
        for element in node.list:
            toReturn.append(element.accept(self))
        return toReturn
    
    def visit_Init(self,node):
        #print "visiting Init"
        return [node.expression.accept(self),node.id]
    
    def visit_Instructions(self,node):
        #print "visiting Instructions"
        self.symbolTable = SymbolTable(self.symbolTable,'instructions')
        for element in node.list :
            element.accept(self)
        self.symbolTable = self.symbolTable.getParentScope()
            
    def visit_PrintInstr(self,node):
        #print "visiting PrintInstr"
        if node.expression.accept(self) not in ['string','int','float']:
            self.error("cannot print expression of that type",node.line)
        
    def visit_LabeledInstr(self,node):
        #print "visiting LabeledInstr"
        node.instruction.accept(self)
    
    def visit_Assignment(self,node):
        #print "visiting Assignment"
        if self.symbolTable.getIncludingParents(node.id.value) == None:
            self.error("unknown symbol name: "+id.value,id.line)
        try:
            idType = node.id.accept(self)
            exprType = node.expression.accept(self)
            self.ttype['f'][idType][exprType]
        except:
            self.error("cannot assign "+exprType+" to "+idType,node.id.line)
            
    
    def visit_ChoiceInstr(self,node):
        #print "visiting ChoiceInstr"
        node.condition.accept(self)
        node.instruction.accept(self)
        node.elseInstruction.accept(self)
        
    def visit_Break(self,node):
        #print "visiting Break"
        pass
    
    def visit_Continue(self,node):
        #print "visiting Continue"
        pass
    
    def visit_WhileInstr(self,node):
        #print "visiting While"
        node.condition.accept(self)
        self.symbolTable = SymbolTable(self.symbolTable,'while')
        node.instruction.accept(self)
        self.symbolTable = self.symbolTable.getParentScope()
    
    def visit_RepeatInstr(self,node):
        #print "visiting Repeat"
        node.instructions.accept(self)
        node.condition.accept(self)
    
    def visit_ReturnInstr(self,node):
        #print "visiting Return"
        node.expression.accept(self) #todo check somehow
    
    def visit_CompoundInstr(self,node):
        #print "visiting CompoundInstr"
        #self.symbolTable = SymbolTable(self.symbolTable,'compoundInstr')
        node.declarations.accept(self)
        node.instructions.accept(self)
        #self.symbolTable = self.symbolTable.getParentScope()
    
    def visit_Condition(self,node):
        #print "visiting Condition"
        if node.expression.accept(self) not in ('int'):
            self.error("condition must be of int type",node.line)
    
    def visit_Integer(self,node):
        #print "visiting Integer"
        return 'int'
    
    def visit_Float(self,node):
        #print "visiting Float"
        return 'float'
    
    def visit_String(self,node):
        #print "visiting String"
        return 'string'
    
    def visit_Id(self,node):
        #print "visiting Id"
        if self.symbolTable.getIncludingParents(node.value):
            return self.symbolTable.getIncludingParents(node.value)
        self.error("undefined symbol: "+node.value,node.line)
    
    def visit_ParExpr(self,node):
        #print "visiting ParExpr"
        return node.expression.accept(self)   
        
    def visit_BinExpr(self,node):
        operator = node.operator
        first = node.first.accept(self)
        second = node.second.accept(self)            
            
        #print "visiting BinExpr"
        #print first
        #print operator
        #print second
        try:
            return self.ttype[operator][first][second]
        except:
            self.error("cannot compute operation: " +operator+",on arguments: "+first+", "+second,node.first.line)
            
          
    def visit_FunExpr(self,node):
        #print "visiting FunExpr"
        funSymbol = self.symbolTable.getIncludingParents(node.id.value)
        for i in range(len(node.expressionList.list)):
            try:
                baseArgType = funSymbol.argList[i]
                givenArgType = node.expressionList.list[i].accept(self)
                self.ttype['f'][baseArgType][givenArgType]
            except:
                self.error("bad argument in funcall",node.line)
        return funSymbol.type
    
    def visit_ExprList(self,node):
        #print "visiting ExprList"
        toReturn = []
        for element in node.list:
            toReturn.append(element.accept(self))
        return toReturn
    
    def visit_FunDefs(self,node):
        #print "visiting FunDefs"
        for element in node.list :
            element.accept(self)
            
    def visit_FunDef(self,node):
        #print "visiting FunDef"
        self.symbolTable = SymbolTable(self.symbolTable,node.id.value)
        self.symbolTable.getParentScope().put(node.id.value,FunSymbol(node.type,node.id.value,map(lambda x : x.accept(self),node.argList.list)))
        node.compoundInstr.accept(self)
        self.symbolTable = self.symbolTable.getParentScope()
        
    def visit_ArgList(self,node):
        #print "visiting ArgList"
        toReturn = []
        for element in node.list:
            toReturn.append(element.accept(self))
        return toReturn
    
    def visit_Arg(self,node):
        #print "visiting Arg"
        self.symbolTable.put(node.id.value,node.type)
        return node.type 
Example #46
0
    def scan(self, filename):
        symbolTableIdentifiers = SymbolTable(37)
        symbolTableConstants = SymbolTable(37)
        pif = PIF()

        file = open(filename, 'r')
        lines = file.readlines()
        error = []

        count = 0
        for line in lines:
            line = line.strip()
            tokensOnLine = line.strip().split()
            tokens = []
            regexForTokens = "[^a-zA-Z0-9\"]"
            for token in tokensOnLine:
                tokens = tokens + re.split('(' + regexForTokens + ')', token)
            while ('' in tokens):
                tokens.remove('')
            while (None in tokens):
                tokens.remove(None)

            i = 0
            while i < len(tokens):
                j = i + 1
                if (i < len(tokens) - 1):
                    if tokens[i] == "=" and tokens[i + 1] == "=":
                        tokens[i] = "=="
                        j = i + 2
                if (i < len(tokens) - 2):
                    if tokens[i] == "=" and tokens[i + 1] == "/" and tokens[
                            i + 2] == "=":
                        tokens[i] = "=/="
                        j = i + 3
                if (i < len(tokens) - 1):
                    if tokens[i] == "<" and tokens[i + 1] == "=":
                        tokens[i] = "<="
                        j = i + 2
                if (i < len(tokens) - 1):
                    if tokens[i] == ">" and tokens[i + 1] == "=":
                        tokens[i] = ">="
                        j = i + 2
                if tokens[i] in self.__reservedWordsAndSeparators:
                    pif.genPIF(tokens[i], (0, 0))
                else:
                    if self.checkIfIdentifier(tokens[i]):
                        pif.genPIF(self.__identifierCode,
                                   symbolTableIdentifiers.position(tokens[i]))
                    else:
                        if self.checkIfNumberConstant(tokens[i]):
                            pif.genPIF(
                                self.__constantCode,
                                symbolTableConstants.position(tokens[i]))
                        else:
                            if self.checkIfStringConstant(tokens[i]):
                                pif.genPIF(
                                    self.__constantCode,
                                    symbolTableConstants.position(tokens[i]))
                            else:
                                error.append("error on line " + str(count) +
                                             " for token " + tokens[i])
                i = j

            print("Line{}: {}".format(count, tokens))
            count += 1

        file.close()

        with open('PIF.out', 'w') as f:
            print('PIF:\n', pif.getPIF(), file=f)

        with open('ST_constants.out', 'w') as f:
            print('ST_constants:\n', str(symbolTableConstants), file=f)

        with open('ST_identifiers.out', 'w') as f:
            print('ST_identifiers:\n', str(symbolTableIdentifiers), file=f)

        if (error == []):
            print("Lexically correct.")
        else:
            print(error)
Example #47
0
class TypeChecker(NodeVisitor):

    def __init__(self):
        self.symbol_table = SymbolTable(None, "TypeChecker", {})
        self.ttypes = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None)))
        self.fill_ttypes()

    def fill_ttypes(self):
        # arithmetic int operations
        self.add_ttype('+', 'int', 'int', 'int')
        self.add_ttype('-', 'int', 'int', 'int')
        self.add_ttype('*', 'int', 'int', 'int')
        self.add_ttype('/', 'int', 'int', 'int')
        self.add_ttype('%', 'int', 'int', 'int')
        # binary int operations
        self.add_ttype('&', 'int', 'int', 'int')
        self.add_ttype('|', 'int', 'int', 'int')
        self.add_ttype('^', 'int', 'int', 'int')
        self.add_ttype('<<', 'int', 'int', 'int')
        self.add_ttype('>>', 'int', 'int', 'int')
        # arithmetic float operations
        self.add_ttype('+', 'float', 'float', 'float')
        self.add_ttype('-', 'float', 'float', 'float')
        self.add_ttype('*', 'float', 'float', 'float')
        self.add_ttype('/', 'float', 'float', 'float')
        self.add_ttype('%', 'float', 'float', 'float')
        self.add_ttype('+', 'int', 'float', 'float')
        self.add_ttype('-', 'int', 'float', 'float')
        self.add_ttype('*', 'int', 'float', 'float')
        self.add_ttype('/', 'int', 'float', 'float')
        self.add_ttype('%', 'int', 'float', 'float')
        self.add_ttype('+', 'float', 'int', 'float')
        self.add_ttype('-', 'float', 'int', 'float')
        self.add_ttype('*', 'float', 'int', 'float')
        self.add_ttype('/', 'float', 'int', 'float')
        self.add_ttype('%', 'float', 'int', 'float')
        # relational int operations
        self.add_ttype('==', 'int', 'int', 'int')
        self.add_ttype('!=', 'int', 'int', 'int')
        self.add_ttype('<', 'int', 'int', 'int')
        self.add_ttype('>', 'int', 'int', 'int')
        self.add_ttype('<=', 'int', 'int', 'int')
        self.add_ttype('>=', 'int', 'int', 'int')
        # relational float operations
        self.add_ttype('==', 'float', 'float', 'float')
        self.add_ttype('!=', 'float', 'float', 'float')
        self.add_ttype('<', 'float', 'float', 'float')
        self.add_ttype('>', 'float', 'float', 'float')
        self.add_ttype('<=', 'float', 'float', 'float')
        self.add_ttype('>=', 'float', 'float', 'float')
        self.add_ttype('==', 'int', 'float', 'float')
        self.add_ttype('!=', 'int', 'float', 'float')
        self.add_ttype('<', 'int', 'float', 'float')
        self.add_ttype('>', 'int', 'float', 'float')
        self.add_ttype('<=', 'int', 'float', 'float')
        self.add_ttype('>=', 'int', 'float', 'float')
        self.add_ttype('==', 'float', 'int', 'float')
        self.add_ttype('!=', 'float', 'int', 'float')
        self.add_ttype('<', 'float', 'int', 'float')
        self.add_ttype('>', 'float', 'int', 'float')
        self.add_ttype('<=', 'float', 'int', 'float')
        self.add_ttype('>=', 'float', 'int', 'float')
        # string operations
        self.add_ttype('+', 'string', 'string', 'string')
        self.add_ttype('*', 'string', 'int', 'string')
        self.add_ttype('==', 'string', 'string', 'string')
        self.add_ttype('!=', 'string', 'string', 'string')
        self.add_ttype('<', 'string', 'string', 'string')
        self.add_ttype('>', 'string', 'string', 'string')
        self.add_ttype('<=', 'string', 'string', 'string')
        self.add_ttype('>=', 'string', 'string', 'string')

    def add_ttype(self, operation, operand1, operand2, returned):
        self.ttypes[operation][operand1][operand2] = returned

    def visit_Name(self, node):
        return node.name

    def visit_CheckedName(self, node):
        if self.symbol_table.get(node.name):
            return node.name
        else:
            print("Error: Usage of undeclared variable '%s': line %s" % (node.name, node.lineno))
            return node.name

    def visit_Operator(self, node):
        return node.op

    def visit_Integer(self, node):
        return 'int'

    def visit_Float(self, node):
        return 'float'

    def visit_String(self, node):
        return 'string'

    def visit_Program(self, node):
        self.symbol_table = self.symbol_table.push_scope("Program")
        for item in node.body:
            self.visit(item)
        self.symbol_table = self.symbol_table.pop_scope()

    def visit_Declaration(self, node):
        for init in node.inits:
            name = self.visit(init.name)
            var = self.symbol_table.get_declared_var(name)
            if var:
                if isinstance(var, FunctionDefSymbol):
                    print("Error: Function identifier '%s' used as a variable: line %d" % (name, node.lineno))
                else:
                    print("Error: Variable '%s' already declared: line %d" % (name, node.lineno))
            else:
                self.symbol_table.put(name, VariableSymbol(init.name, node.var_type))
            self.visit(init)

    def visit_Initializer(self, node):
        expression_ret_type = self.get_return_type(node.expression)
        declared_type = self.get_return_type(node.name)
        if expression_ret_type and declared_type:
            TypeChecker.check_type_consistency(node, declared_type, expression_ret_type)

    def visit_PrintInstr(self, node):
        for item in node.expr_list:
            self.visit(item)

    def visit_LabeledInstr(self, node):
        self.visit(node.instruction)

    def visit_Assignment(self, node):
        declared_type = self.get_return_type(node.target)
        expression_ret_type = self.get_return_type(node.value)
        if not declared_type:
            print("Error: Variable '%s' undefined in current scope: line %d" % (self.visit(node.target), node.lineno))
        elif expression_ret_type:
            TypeChecker.check_type_consistency(node, declared_type, expression_ret_type)

    def visit_IfInstr(self, node):
        self.visit(node.condition)
        self.visit(node.body)
        if node.else_body:
            self.visit(node.else_body)

    def visit_WhileInstr(self, node):
        self.visit(node.condition)
        self.symbol_table.set_inside_loop(1)
        self.visit(node.body)
        self.symbol_table.set_inside_loop(0)

    def visit_RepeatInstr(self, node):
        self.symbol_table.set_inside_loop(1)
        for item in node.body:
            self.visit(item)
        self.visit(node.condition)
        self.symbol_table.set_inside_loop(0)


    def visit_ReturnInstr(self, node):
        ret_type = self.get_return_type(node.expression)
        scope = self.symbol_table
        while scope and scope.name != "FunctionDef":
            scope = scope.get_parent_scope()
        if scope:
            fun_def = scope.get(scope.function_name)
            if fun_def:
                function_ret_type = fun_def.type.name
                scope.set_return_present(1)
                if ret_type and function_ret_type:
                    if function_ret_type == 'int' and ret_type == 'float':
                        print("Warning: Possible loss of precision: returning %s from function returning %s: line %s" %
                              (ret_type, function_ret_type, node.lineno))
                    elif function_ret_type == 'float' and ret_type == 'int':
                        pass
                    elif ret_type != function_ret_type:
                        print("Error: Improper returned type, expected %s, got %s: line %s" %
                              (function_ret_type, ret_type, node.lineno))
            else:
                # should not happen...
                print("something bad happened while parsing or checking")
        else:
            print("Error: return instruction outside a function: line %s" % node.lineno)

    def visit_ContinueInstr(self, node):
        if not self.symbol_table.is_inside_loop():
            print("Error: continue instruction outside a loop: line %s" % node.lineno)

    def visit_BreakInstr(self, node):
        if not self.symbol_table.is_inside_loop():
            print("Error: break instruction outside a loop: line %s" % node.lineno)

    def visit_CompoundInstr(self, node):
        self.symbol_table = self.symbol_table.push_scope("CompoundInstr")
        for item in node.declarations:
            self.visit(item)
        for item in node.instructions:
            self.visit(item)
        self.symbol_table = self.symbol_table.pop_scope()

    def visit_BinaryExpr(self, node):
        type_1 = self.get_return_type(node.left)
        if not type_1:
            print("Error: Usage of undeclared variable '%s': line %s" % (self.visit(node.left), node.lineno))
        type_2 = self.get_return_type(node.right)
        if not type_2:
            print("Error: Usage of undeclared variable '%s': line %s" % (self.visit(node.right), node.lineno))
        op = self.visit(node.op)
        ret = self.ttypes[op][type_1][type_2]
        if not ret:
            print("Error: Illegal operation, %s %s %s: line %s" % (type_1, op, type_2, node.left.lineno))
        return ret

    def visit_MethodCallExpr(self, node):
        name = self.visit(node.name)
        fun_def = self.symbol_table.get(name)
        if fun_def:
            if len(fun_def.args) != len(node.args):
                print("Error: Improper number of args in %s call: line %s" % (fun_def.name.name, node.lineno))
            else:
                for (fun_arg, call_arg) in zip(fun_def.args, node.args):
                    fun_arg_type = fun_arg.arg_type.name
                    call_arg_type = self.get_return_type(call_arg)
                    if fun_arg_type and call_arg_type:
                        if fun_arg_type == 'int' and call_arg_type == 'float':
                            print("Warning: Possible loss of precision: passing %s instead of %s: line %s" %
                                  (call_arg_type, fun_arg_type, node.lineno))
                        elif fun_arg_type == 'float' and call_arg_type == 'int':
                            pass
                        elif call_arg_type != fun_arg_type:
                            print("Error: Improper type of args in %s call: line %s" % (name, node.lineno))
                            break
            return fun_def.type.name
        else:
            print("Error: Call of undefined function: '%s': line %s" % (name, node.lineno))

    def visit_FunctionDef(self, node):
        name = self.visit(node.name)
        if self.symbol_table.get(name):
            print("Error: Redefinition of function '%s': line %s" % (name, node.lineno))
        self.symbol_table.put(name, FunctionDefSymbol(node.name, node.return_type, node.args))
        self.symbol_table = self.symbol_table.push_scope("FunctionDef")
        self.symbol_table.put(name, FunctionDefSymbol(node.name, node.return_type, node.args))
        self.symbol_table.function_name = name
        for arg in node.args:
            self.visit(arg)
        self.symbol_table.set_return_present(0)
        self.visit(node.body)
        if not self.symbol_table.get_return_present():
            print("Error: Missing return statement in function '%s' returning %s: line %s" %
                  (name, self.visit(node.return_type), node.lineno))
        self.symbol_table = self.symbol_table.pop_scope()

    def visit_Argument(self, node):
        name = self.visit(node.name)
        if self.symbol_table.name == "FunctionDef":
            self.symbol_table.put(name, VariableSymbol(node.name, node.arg_type))

    def get_return_type(self, node):
        if isinstance(node, AST.Name):
            var = self.symbol_table.get(node.name)
            return None if not var else var.type.name
        return self.visit(node)

    @staticmethod
    def check_type_consistency(node, declared_type, expression_ret_type):
        if declared_type == 'int' and expression_ret_type == 'float':
            print("Warning: Possible loss of precision: assignment of %s to %s: line %s" %
                  (expression_ret_type, declared_type, node.lineno))
        elif declared_type == 'float' and expression_ret_type == 'int':
            pass
        elif declared_type != expression_ret_type:
            print("Error: Assignment of %s to %s: line %s" % (expression_ret_type, declared_type, node.lineno))
class CompilationEngine:

    KEYWORD = 'KEYWORD'
    SYMBOL = 'SYMBOL'
    IDENTIFIER = 'IDENTIFIER'
    INT_CONST = 'INT_CONST'
    STRING_CONST = 'STRING_CONST'

    CLASS = 'class'
    METHOD = 'method'
    FUNCTION = 'function'
    CONSTRUCTOR = 'constructor'
    INT = 'int'
    BOOLEAN = 'boolean'
    CHAR = 'char'
    VOID = 'void'
    VAR = 'var'
    STATIC = 'static'
    FIELD = 'field'
    LET = 'let'
    DO = 'do'
    IF = 'if'
    ELSE = 'else'
    WHILE = 'while'
    RETURN = 'return'
    TRUE = 'true'
    FALSE = 'false'
    NULL = 'null'
    THIS = 'this'

    SEMICOLAN = ';'
    L_PARENTHESES = '('
    R_PARENTHESES = ')'
    L_BRACKET = '['
    R_BRACKET = ']'

    #creates a new compilation engine with the given input
    #and output. The next routine called must be compileClass()
    def __init__(self, tokenizer, output_stream, vm_writer):
        print('CompilationEngine is initializing')
        #self.output = open(output_stream, 'w')
        self.tokenizer = tokenizer
        self.symboltable = SymbolTable()
        self.vm_writer = vm_writer
        self.label = 0

        #goes to the first token in the stream
        self.tokenizer.advance()

    def produceLabel(self):
        label = 'LABEL_' + str(self.label)
        self.label = self.label + 1
        return label

    def eat_token_type(self, tok_type):
        tok = self.tokenizer.current_token
        if self.tokenizer.tokenType() == tok_type:
            self.tokenizer.advance()
            return tok
        else:
            raise Exception('Expected a token of type ' + tok_type +
                            ' but found ' + tok)

    def eat_token(self, tok_type, valid_set):
        tok = self.tokenizer.current_token
        if self.tokenizer.tokenType() == tok_type and tok in valid_set:
            self.tokenizer.advance()
            return tok
        else:
            raise Exception('Expected a token of type ' + tok_type +
                            ' from the set: ' + valid_set + ' but found ' +
                            tok)

    def compileClass(self):
        self.eat_token(self.KEYWORD, {self.CLASS})
        self.tokenizer.class_name = self.eat_token_type(
            self.IDENTIFIER)  #className
        self.eat_token(self.SYMBOL, {'{'})
        #classVarDec*
        while self.tokenizer.tokenType(
        ) == self.KEYWORD and self.tokenizer.keyWord() in {'static', 'field'}:
            self.compileClassVarDec()
        #subroutineDec*
        while self.tokenizer.tokenType(
        ) == self.KEYWORD and self.tokenizer.keyWord() in {
                'constructor', 'function', 'method'
        }:
            self.compileSubroutine()

        self.eat_token(self.SYMBOL, {'}'})
        self.vm_writer.close()
        return

    def compileVoidOrType(self):
        if (self.tokenizer.tokenType() == self.KEYWORD
                and self.tokenizer.keyWord()
                in {'int', 'char', 'boolean', 'void'}):
            self.tokenizer.advance()
        elif (self.tokenizer.tokenType() == self.IDENTIFIER):
            self.tokenizer.advance()
        else:
            raise Exception('expected int, char, boolean, or className')

    def compileType(self):
        curr_tok = self.tokenizer.current_token
        tok_type = self.tokenizer.tokenType()
        if (tok_type == self.KEYWORD
                and curr_tok in {self.INT, self.CHAR, self.BOOLEAN}):
            self.tokenizer.advance()
        elif (tok_type == self.IDENTIFIER):
            self.tokenizer.advance()
        else:
            raise Exception('expected int, char, boolean, or className')

        return curr_tok

    def compileClassVarDec(self):
        identifier_kind = self.eat_token(self.KEYWORD,
                                         {self.STATIC, self.FIELD})
        identifier_type = self.compileType()
        identifier_name = self.eat_token_type(self.IDENTIFIER)
        self.symboltable.define(identifier_name, identifier_type,
                                identifier_kind, False)

        while self.tokenizer.tokenType(
        ) == self.SYMBOL and self.tokenizer.symbol() == ',':
            self.tokenizer.advance()
            identifier_name = self.eat_token_type(self.IDENTIFIER)
            self.symboltable.define(identifier_name, identifier_type,
                                    identifier_kind, False)

        self.eat_token(self.SYMBOL, {';'})

        return

    def compileSubroutine(self):
        #reset subrountine symbol table
        self.symboltable.startSubroutine()

        subroutine_type = self.eat_token(
            self.KEYWORD, {self.CONSTRUCTOR, self.FUNCTION, self.METHOD})
        if subroutine_type == self.METHOD:
            self.symboltable.define('this', self.tokenizer.class_name,
                                    'argument', True)

        self.compileVoidOrType()  #('void' | type)

        subroutine_identifier = self.eat_token_type(
            self.IDENTIFIER)  # subroutineName:identifier

        self.eat_token(self.SYMBOL, {'('})
        self.compileParameterList()  #parameterList:
        self.eat_token(self.SYMBOL, {')'})

        #subroutineBody
        self.eat_token(self.SYMBOL, {'{'})

        #varDec*
        while self.tokenizer.tokenType(
        ) == self.KEYWORD and self.tokenizer.keyWord() == self.VAR:
            self.compileVarDec()

        #we can now write to the vm file the declaration
        self.vm_writer.writeFunction(
            self.tokenizer.class_name + '.' + subroutine_identifier,
            self.symboltable.count_map['local'])
        #if constructor
        if subroutine_type == self.CONSTRUCTOR:
            self.vm_writer.writePush('constant',
                                     self.symboltable.count_map['field'])
            self.vm_writer.writeCall('Memory.alloc', 1)
            self.vm_writer.writePop('pointer', 0)

        if subroutine_type == self.METHOD:
            self.vm_writer.writePush('argument', 0)
            self.vm_writer.writePop('pointer', 0)

        self.compileStatements()
        self.eat_token(self.SYMBOL, {'}'})

        return

    def compileParameterList(self):
        #perhaps empty
        if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol(
        ) == ')':
            return

        identifier_type = self.compileType()
        identifier_name = self.eat_token_type(self.IDENTIFIER)
        self.symboltable.define(identifier_name, identifier_type, 'argument',
                                True)

        # comma ,,,,,
        while self.tokenizer.tokenType(
        ) == self.SYMBOL and self.tokenizer.symbol() == ',':
            self.tokenizer.advance()
            identifier_type = self.compileType()
            identifier_name = self.eat_token_type(self.IDENTIFIER)
            self.symboltable.define(identifier_name, identifier_type,
                                    'argument', True)

        return

    def compileVarDec(self):
        self.eat_token(self.KEYWORD, {self.VAR})  #var
        identifier_kind = 'local'
        identifier_type = self.compileType()

        identifier_name = self.eat_token_type(self.IDENTIFIER)  #varName
        self.symboltable.define(identifier_name, identifier_type,
                                identifier_kind, True)

        while self.tokenizer.tokenType(
        ) == self.SYMBOL and self.tokenizer.symbol() == ',':
            self.tokenizer.advance()
            identifier_name = self.eat_token_type(self.IDENTIFIER)
            self.symboltable.define(identifier_name, identifier_type,
                                    identifier_kind, True)

        self.eat_token(self.SYMBOL, {';'})

        return

    def compileStatements(self):
        statements_set = {self.LET, self.IF, self.WHILE, self.DO, self.RETURN}
        while self.tokenizer.tokenType(
        ) == self.KEYWORD and self.tokenizer.keyWord() in statements_set:
            tok = self.tokenizer.current_token
            if tok == self.LET: self.compileLet()
            if tok == self.IF: self.compileIf()
            if tok == self.WHILE: self.compileWhile()
            if tok == self.DO: self.compileDo()
            if tok == self.RETURN: self.compileReturn()

        return

    def compileDo(self):
        self.eat_token(self.KEYWORD, {self.DO})
        initial_identifier = self.eat_token_type(self.IDENTIFIER)
        is_method_call = False

        if self.tokenizer.current_token not in {'.', '('}:
            raise Exception(
                'Excepted either a period or a left parentheses but instead: '
                + self.tokenizer.current_token)

        # varName|className.subroutine()
        if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol(
        ) == '.':
            self.tokenizer.advance()
            dotId = self.eat_token_type(
                self.IDENTIFIER)  #subroutineName:identifier
            #is the initial_identifier a className or a varName?
            if self.symboltable.contains(initial_identifier):
                is_method_call = True
                segment = self.symboltable.kindOf(initial_identifier)
                if segment == 'field': segment = 'this'
                self.vm_writer.writePush(
                    segment, self.symboltable.indexOf(initial_identifier))
                full_call_identifier = self.symboltable.typeOf(
                    initial_identifier) + '.' + dotId
            else:
                full_call_identifier = initial_identifier + '.' + dotId

        #subroutine()
        elif self.tokenizer.tokenType(
        ) == self.SYMBOL and self.tokenizer.symbol() == '(':
            is_method_call = True
            self.vm_writer.writePush('pointer', 0)
            full_call_identifier = self.tokenizer.class_name + '.' + initial_identifier

        self.eat_token(self.SYMBOL, {'('})
        n_parameters = self.compileExpressionList()  #expressionList
        self.eat_token(self.SYMBOL, {')'})
        #add the extra argument for the method call
        if is_method_call: n_parameters = n_parameters + 1

        self.eat_token(self.SYMBOL, {';'})  #semi-colon ;;;;;;
        self.vm_writer.writeCall(full_call_identifier, n_parameters)
        self.vm_writer.writePop('temp', 0)

        return

    def compileLet(self):
        self.eat_token(self.KEYWORD, {self.LET})
        lhs_var = self.eat_token_type(self.IDENTIFIER)  #varName
        is_array = False
        #what type of variable is this?

        #array indexing into the variable
        if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol(
        ) == '[':
            is_array = True
            self.tokenizer.advance()
            segment = self.symboltable.kindOf(lhs_var)
            if segment == 'field': segment = 'this'
            self.vm_writer.writePush(segment,
                                     self.symboltable.indexOf(lhs_var))
            self.compileExpression()
            self.vm_writer.writeArithmetic('+', True)
            self.eat_token(self.SYMBOL, {']'})

        self.eat_token(self.SYMBOL, {'='})
        self.compileExpression()
        self.eat_token(self.SYMBOL, {';'})

        if is_array:
            self.vm_writer.writePop('temp', 0)
            self.vm_writer.writePop('pointer', 1)
            self.vm_writer.writePush('temp', 0)
            self.vm_writer.writePop('that', 0)
        else:
            segment = self.symboltable.kindOf(lhs_var)
            if segment == 'field': segment = 'this'
            self.vm_writer.writePop(segment, self.symboltable.indexOf(lhs_var))

        return

    def compileWhile(self):
        self.eat_token(self.KEYWORD, {self.WHILE})

        label_1 = self.produceLabel()
        label_2 = self.produceLabel()

        self.vm_writer.writeLabel(label_1)
        self.eat_token(self.SYMBOL, {'('})
        self.compileExpression()
        self.eat_token(self.SYMBOL, {')'})

        self.vm_writer.writeArithmetic('~', False)  #negate the expression
        self.vm_writer.writeIf(label_2)

        self.eat_token(self.SYMBOL, {'{'})
        self.compileStatements()
        self.eat_token(self.SYMBOL, {'}'})

        self.vm_writer.writeGoto(label_1)  #goto LABEL_1
        self.vm_writer.writeLabel(label_2)  #label LABEL_2

        return

    def compileReturn(self):
        self.eat_token(self.KEYWORD, {self.RETURN})

        if self.tokenizer.current_token != ';':  # expression?
            self.compileExpression()
            self.eat_token(self.SYMBOL, {';'})
        else:  #no expression
            self.eat_token(self.SYMBOL, {';'})
            self.vm_writer.writePush('constant', 0)

        self.vm_writer.writeReturn()
        return

    def compileIf(self):
        self.eat_token(self.KEYWORD, {self.IF})

        label_1 = self.produceLabel()
        label_2 = self.produceLabel()  #might not be used if there is no else

        self.eat_token(self.SYMBOL, {'('})
        self.compileExpression()  #expression
        self.eat_token(self.SYMBOL, {')'})

        self.vm_writer.writeArithmetic('~', False)  #negate the expression
        self.vm_writer.writeIf(label_1)

        self.eat_token(self.SYMBOL, {'{'})
        self.compileStatements()
        self.eat_token(self.SYMBOL, {'}'})

        #maybe else
        if self.tokenizer.tokenType(
        ) == self.KEYWORD and self.tokenizer.keyWord() == self.ELSE:
            self.vm_writer.writeGoto(label_2)  #goto LABEL_2
            self.vm_writer.writeLabel(label_1)  #label LABEL_1
            self.tokenizer.advance()
            self.eat_token(self.SYMBOL, {'{'})
            self.compileStatements()
            self.eat_token(self.SYMBOL, {'}'})
            self.vm_writer.writeLabel(label_2)  #label LABEL_2
        else:
            self.vm_writer.writeLabel(label_1)  #label LABEL_1

        return

    #perhaps make a set above to contain all the operators?
    op_set = {
        '+', '-', '*', '/', '&', '|', '<', '>', '=', '&lt;', '&gt;', '&amp;'
    }

    def compileExpression(self):
        self.compileTerm()

        while self.tokenizer.tokenType(
        ) == self.SYMBOL and self.tokenizer.symbol() in self.op_set:
            operator = self.tokenizer.current_token
            self.tokenizer.advance()
            self.compileTerm()
            self.vm_writer.writeArithmetic(operator, True)
        return

    def compileTerm(self):
        #integerConstant
        if self.tokenizer.tokenType() == self.INT_CONST:
            #push the constant to the stack
            self.vm_writer.writePush('constant', self.tokenizer.current_token)
            self.tokenizer.advance()
            return

        #stringConstant
        if self.tokenizer.tokenType() == self.STRING_CONST:
            tok = self.tokenizer.current_token
            self.vm_writer.writePush('constant', len(tok))
            self.vm_writer.writeCall('String.new', 1)

            for char in tok:
                self.vm_writer.writePush('constant', ord(char))
                self.vm_writer.writeCall('String.appendChar', 2)

            self.tokenizer.advance()
            return

        #keywordConstant
        key_const_set = {self.TRUE, self.FALSE, self.NULL, self.THIS}
        if self.tokenizer.tokenType(
        ) == self.KEYWORD and self.tokenizer.keyWord() in key_const_set:
            keyword_constant = self.tokenizer.current_token

            if keyword_constant == self.TRUE:
                self.vm_writer.writePush('constant', 1)
                self.vm_writer.writeArithmetic('-', False)

            if keyword_constant in {self.FALSE, self.NULL}:
                self.vm_writer.writePush('constant', 0)

            if keyword_constant == self.THIS:
                self.vm_writer.writePush('pointer', 0)

            self.tokenizer.advance()
            return

        #varName | varName[expression] | subroutineName() | className.subroutine() | varName.subrountine()
        if self.tokenizer.tokenType() == self.IDENTIFIER:
            main_identifier = self.tokenizer.current_token
            self.tokenizer.advance()

            #varName[expression]
            if self.tokenizer.tokenType(
            ) == self.SYMBOL and self.tokenizer.symbol() == '[':
                self.tokenizer.advance()
                #push the array variable onto the stack
                segment = self.symboltable.kindOf(main_identifier)
                if segment == 'field': segment = 'this'
                self.vm_writer.writePush(
                    segment, self.symboltable.indexOf(main_identifier))
                self.compileExpression()
                self.eat_token(self.SYMBOL, {']'})  #close the array
                self.vm_writer.writeArithmetic('+', True)
                self.vm_writer.writePop('pointer', 1)
                self.vm_writer.writePush('that', 0)
                return

            #subrountineName()
            if self.tokenizer.tokenType(
            ) == self.SYMBOL and self.tokenizer.symbol() == '(':
                self.tokenizer.advance()
                n_parameters = self.compileExpressionList()
                self.eat_token(self.SYMBOL, {')'})
                #should we do this right here without a semicolan??
                self.vm_writer.writeCall(main_identifier, n_parameters)
                return

            #className|varName.subroutine()
            if self.tokenizer.tokenType(
            ) == self.SYMBOL and self.tokenizer.symbol() == '.':
                self.tokenizer.advance()

                is_var_name = self.symboltable.contains(main_identifier)

                dotId = self.eat_token_type(self.IDENTIFIER)
                #if it is a variable.

                if is_var_name:
                    segment = self.symboltable.kindOf(main_identifier)
                    if segment == 'field': segment = 'this'
                    self.vm_writer.writePush(
                        segment, self.symboltable.indexOf(main_identifier))
                    main_identifier = self.symboltable.typeOf(
                        main_identifier) + '.' + dotId
                else:
                    main_identifier = main_identifier + '.' + dotId

                self.eat_token(self.SYMBOL, {'('})

                n_parameters = self.compileExpressionList()
                #what about methods? they get an extra argument no?

                self.eat_token(self.SYMBOL, {')'})
                if is_var_name: n_parameters = n_parameters + 1
                self.vm_writer.writeCall(main_identifier, n_parameters)
                return

            #push the variable to the stack (if it exists in the current scope)
            segment = self.symboltable.kindOf(main_identifier)
            if segment == 'field': segment = 'this'
            self.vm_writer.writePush(segment,
                                     self.symboltable.indexOf(main_identifier))
            return

        #(expression)
        if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol(
        ) == '(':
            self.tokenizer.advance()
            self.compileExpression()
            self.eat_token(self.SYMBOL, {')'})
            return

        #unary opp
        if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol(
        ) in {'-', '~'}:
            operator = self.tokenizer.current_token
            self.tokenizer.advance()
            self.compileTerm()
            self.vm_writer.writeArithmetic(operator, False)
            return

        #if we got here, raise exception
        raise Exception('end of the term and nothing was found')

    def compileExpressionList(self):
        num_expressions = 0

        if self.tokenizer.tokenType() == self.SYMBOL and self.tokenizer.symbol(
        ) == ')':
            return num_expressions

        self.compileExpression()
        num_expressions = num_expressions + 1

        while self.tokenizer.tokenType(
        ) == self.SYMBOL and self.tokenizer.symbol() == ',':
            self.tokenizer.advance()
            self.compileExpression()
            num_expressions = num_expressions + 1

        return num_expressions
Example #49
0
class TypeChecker(NodeVisitor):

    errorOccured = False
    symbolTable = SymbolTable()
    WRONG_MATRIX = "wrong_matrix"
    WRONG_REF = "wrong_ref"

    returnedType = {'int': {}, 'float': {}, 'string': {}}
    for i in returnedType.keys():
        returnedType[i] = {}
        for j in returnedType.keys():
            returnedType[i][j] = {}
            for k in ['+', '-', '/', '*', '%']:
                returnedType[i][j][k] = 'err'

    returnedType['int']['float']['+'] = 'float'
    returnedType['int']['int']['+'] = 'int'
    returnedType['float']['float']['+'] = 'float'
    returnedType['float']['int']['+'] = 'float'
    returnedType['string']['string']['+'] = 'string'
    returnedType['int']['float']['-'] = 'float'
    returnedType['int']['int']['-'] = 'int'
    returnedType['float']['float']['-'] = 'float'
    returnedType['float']['int']['-'] = 'float'
    returnedType['int']['float']['*'] = 'float'
    returnedType['int']['int']['*'] = 'int'
    returnedType['float']['float']['*'] = 'float'
    returnedType['float']['int']['*'] = 'float'
    returnedType['string']['int']['*'] = 'string'
    returnedType['int']['float']['/'] = 'float'
    returnedType['int']['int']['/'] = 'int'
    returnedType['float']['float']['/'] = 'float'
    returnedType['float']['int']['/'] = 'float'
    returnedType['int']['int']['%'] = 'int'

    returnedTypeRelative = {'int': {}, 'float': {}, 'string': {}}
    for i in returnedTypeRelative.keys():
        returnedTypeRelative[i] = {}
        for j in returnedTypeRelative.keys():
            returnedTypeRelative[i][j] = 'err'

    returnedTypeRelative['int']['float'] = 'int'
    returnedTypeRelative['int']['int'] = 'int'
    returnedTypeRelative['float']['float'] = 'int'
    returnedTypeRelative['float']['int'] = 'int'
    returnedTypeRelative['string']['string'] = 'int'

    def visit_Program(self, node):
        self.visit(node.insts)

    def visit_Instructions(self, node):
        for instruction in node.instrs:
            self.visit(instruction)

    def visit_Print(self, node):
        array_to_print = self.visit(node.expr)
        for elem in array_to_print:
            if elem not in ('int', 'float', 'string'):
                pass
                # print("Error: CANNOT PRINT", node.expr, "which type is:", elem)
                # self.errorOccured = True

    def visit_Assignment(self, node):
        id = self.visit(node.id)
        oper = self.visit(node.oper)[0]
        expr = self.visit(node.expr)
        if oper == '=':
            if isinstance(node.id, AST.Ref):
                if id != self.WRONG_REF and id != expr:
                    print(
                        'Error: MISMATCH in types in matrix reference assignment: left:',
                        id, 'right:', expr)
                    self.errorOccured = True
            else:
                if expr == self.WRONG_MATRIX:
                    print(" - ", node.id)
                else:
                    self.symbolTable.put(str(node.id), expr)
        else:
            if id == "none":
                print("Error:", node.id, oper, "\b=", node.expr,
                      "\tused without previous assignment to variable",
                      node.id)
                self.errorOccured = True
            else:
                ret_type = self.returnedType[id][expr][oper]
                if self.returnedType[id][expr][oper] == 'err':
                    print("Error: MISMATCH TYPE in", node.id, oper, "\b=",
                          expr, "\b. Previous type was", id)
                    self.errorOccured = True

    def visit_Ref(self, node):
        ref_type = self.visit(node.id)

        id_sizes = re.sub("[x]", " ", ref_type).split()
        id_sizes.pop(len(id_sizes) - 1)
        for i in range(0, len(id_sizes)):
            id_sizes[i] = int(id_sizes[i])

        ref_vector = node.vector.exprs
        ref_sizes = []
        for i in ref_vector:
            ref_sizes.append(i.value)

        if len(id_sizes) < len(ref_sizes):
            print('Error:\tmatrix wrong reference. Real size:', id_sizes,
                  'while referenced to:', ref_sizes)
            self.errorOccured = True
            return self.WRONG_REF
        else:
            for i in range(0, len(ref_sizes)):
                if ref_sizes[i] >= id_sizes[i]:
                    print('Error:\tmatrix wrong reference. Real size:',
                          id_sizes, 'while referenced to:', ref_sizes)
                    self.errorOccured = True
                    return self.WRONG_REF
        dimensions_to_skip = len(ref_sizes)
        id_sizes = re.sub("[x]", " ", ref_type).split()
        ret_type = ''
        for i in range(len(id_sizes), dimensions_to_skip, -1):
            ret_type = 'x' + id_sizes[i - 1] + ret_type
        return ret_type[1:len(ret_type)]

    def visit_Assign_operator(self, node):
        return node.oper

    def visit_Vector(self, node):
        elem_type = self.visit(node.expressions)
        first_type = elem_type[0]
        for elem in elem_type:
            if elem != first_type:
                print("Error: Incompatible types in matrix:",
                      elem_type,
                      end='')
                self.errorOccured = True
                return self.WRONG_MATRIX
        matrix_len = len(node.expressions.exprs)
        return str(matrix_len) + 'x' + first_type

    def visit_Expressions(self, node):
        tmp = []
        for expr in node.exprs:
            tmp.append(self.visit(expr))
        return tmp

    def visit_Choice(self, node):
        self.symbolTable.pushScope()
        self.visit(node.cond)
        self.visit(node.inst1)
        if node.inst2 is not None:
            self.visit(node.inst2)
        self.symbolTable.popScope()

    def visit_While(self, node):
        self.symbolTable.pushScope()
        self.visit(node.cond)
        self.symbolTable.pushLoop()
        self.visit(node.stmt)
        self.symbolTable.popLoop()
        self.symbolTable.popScope()

    def visit_For(self, node):
        self.symbolTable.pushScope()
        self.symbolTable.put(str(node.id), 'int')
        self.visit(node.range)
        self.symbolTable.pushLoop()
        self.visit(node.inst)
        self.symbolTable.popLoop()
        self.symbolTable.popScope()

    def visit_Range(self, node):
        from_type = self.symbolTable.get(node.range_from)
        if from_type != 'none' and from_type != 'int':
            print("Range from should evaluate to int")
            self.errorOccured = True
        to_type = self.symbolTable.get(node.range_to)
        # print('to_type', to_type)
        if to_type != 'none' and to_type != 'int':
            print("Range to should evaluate to int")
            self.errorOccured = True

    def visit_Return(self, node):
        self.visit(node.ret)

    def visit_Continue(self, node):
        if self.symbolTable.loop <= 0:
            print("Continue used outside loop")
            self.errorOccured = True

    def visit_Break(self, node):
        if self.symbolTable.loop <= 0:
            print("Break used outside loop")
            self.errorOccured = True

    def visit_ComInstructions(self, node):
        self.symbolTable.pushScope()
        self.visit(node.instrs)
        self.symbolTable.popScope()

    def visit_Const(self, node):
        if type(node.value) == str:
            return 'string'
        if type(node.value) == int:
            return 'int'
        if type(node.value) == float:
            return 'float'

    def visit_BinExpr(self, node):
        type1 = self.visit(node.left)
        type2 = self.visit(node.right)
        op = node.op
        if type1 not in ('int', 'float',
                         'string') or type2 not in ('int', 'float', 'string'):
            if type1 != type2:
                print("Error: TYPE MISMATCH IN BIN EXPR", type1, op, type2,
                      " here: ", node.left, op, node.right)
                self.errorOccured = True
                return 'wrong_bin_expr'
        elif self.returnedType[type1][type2][op] == 'err':
            print("Error: TYPE MISMATCH IN BIN EXPR", type1, op, type2,
                  " here: ", node.left, op, node.right)
            self.errorOccured = True
        return self.returnedType[type1][type2][op]

    def visit_Condition(self, node):
        type1 = self.visit(node.left)
        type2 = self.visit(node.right)
        if self.returnedTypeRelative[type1][type2] == 'err':
            print("Error: TYPE MISMATCH IN CONDITION:", type1, node.op, type2,
                  'here:', node.left, node.op, node.right)
            self.errorOccured = True
        if self.returnedTypeRelative[type1][type2] != 'int':
            print("Error: CONDITION MUST BE INT")
            self.errorOccured = True

    def visit_Variable(self, node):
        return self.symbolTable.get(str(node.ID))

    def visit_Matrix_operation(self, node):
        type1 = self.symbolTable.get(str(node.matrix1))
        type2 = self.symbolTable.get(str(node.matrix2))
        if type1 != type2:
            print('Error: matrix operation on incompatible types:', type1,
                  type2, 'here:', node.matrix1, node.dot_oper, node.matrix2)
            self.errorOccured = True

    def visit_Dot_operation(self, node):
        return node.dot_oper

    def visit_Matrix(self, node):
        self.visit(node.matrix)

    def visit_Matrix_transposed(self, node):
        self.visit(node.id)

    def visit_Minus_transposed(self, node):
        self.visit(node.id)

    def visit_Matrix_function(self, node):
        size = node.arg
        matrix_type = ""
        for dimension in node.arg.exprs:
            matrix_type += str(dimension) + 'x'
        return matrix_type + 'int'
Example #50
0
class SemanticAnalyser(HelloVisitor):
    """Global variables"""
    current_symbol_table = SymbolTable(parent=None)
    type_table = TypeTable
    type_table.table[1] = PrimitiveType()
    type_table.table[2] = PrimitiveType()
    type_table.table[3] = PrimitiveType()

    @staticmethod
    def unicode_to_str(unicode_str):
        """
        Function to convert unicode string to ascii string
        :param unicode_str: sring in format u'__any_string__'
        :return: ascii string
        """
        return unicodedata.normalize('NFKD',
                                     unicode_str).encode('ascii', 'ignore')

    def visitProgram(self, ctx):
        return self.visitChildren(ctx)

    # Visit a parse tree produced by HelloParser#simpleDeclaration.
    def visitSimpleDeclaration(self, ctx):
        return self.visitChildren(ctx)

    # Visit a parse tree produced by HelloParser#variableDeclaration.
    def visitVariableDeclaration(self, ctx):
        #  array with all children of a current context
        children = ctx.children

        #  get the context of children
        identifier = self.unicode_to_str(ctx.Identifier().getText())
        lang_type = self.visitChildren(ctx)
        expression = ctx.expression()

        #  if type is specified deduce type
        if len(children) > 4:
            lang_type = self.visitLang_type(children[3])
        final_type = lang_type

        #  check if the variable was already defined in the current scope
        if self.current_symbol_table.is_defined_in_current_scope(identifier):
            raise Exception(
                'Variable {} is already defined'.format(identifier))

        #  deduce type from expression if no explicit type was specified
        if lang_type is None:  # 'var' Identifier 'is' expression
            final_type = self.visitExpression(expression)
        #  check if explicit type definition corresponds to the expression type
        elif lang_type is not None and expression is not None:  # 'var' Identifier ':' lang_type 'is' expression
            expression_type = self.visitExpression(expression)
            if lang_type != expression_type:
                raise Exception(
                    'Incompatible types in variable declaration {} '.format(
                        identifier))

        #  add variable to the symbol table
        self.current_symbol_table.add_variable(identifier, final_type)

    # Visit a parse tree produced by HelloParser#typeDeclaration.
    def visitTypeDeclaration(self, ctx):
        #  get the context of children
        identifier = self.unicode_to_str(ctx.Identifier().getText())
        current_type = self.visitLang_type(ctx)

        #  add alias for the type to the Type table
        AliasType.table[identifier] = current_type

    # Visit a parse tree produced by HelloParser#lang_type.
    def visitLang_type(self, ctx):
        #  array with all children of a current context
        children = ctx.children

        #  if type declaration is an alias to an existing alias
        if len(children) > 3 and hasattr(
                ctx.children[3],
                'Identifier') and ctx.children[3].Identifier() is not None:
            identifier = self.unicode_to_str(
                ctx.children[3].Identifier().getText())
            return AliasType.table[identifier]

        #  integrating the Universe
        if len(children) == 1 and self.unicode_to_str(
                children[0].getText()) in AliasType.table.keys():
            return AliasType.table[children[0].getText()]
        return self.visitChildren(ctx)

    # Visit a parse tree produced by HelloParser#primitiveType.
    def visitPrimitiveType(self, ctx):
        #  getting a code of a type from the Type table
        p_text = self.unicode_to_str(ctx.children[0].getText())
        identifier = PrimitiveType.types[p_text]
        return identifier

    # Visit a parse tree produced by HelloParser#userType.
    def visitUserType(self, ctx):
        return self.visitChildren(ctx)

    # Visit a parse tree produced by HelloParser#recordType.
    def visitRecordType(self, ctx):
        #  creating a new scope for the record in order to define new types
        self.current_symbol_table = self.current_symbol_table.create_child_scope(
            'current_record')

        #  array with all children of a current context
        children = ctx.children

        #  creating a dictionary with variables defined in the record
        record_variables = {}
        for c in children:
            if type(c) == HelloParser.VariableDeclarationContext:
                var_name = self.unicode_to_str(c.children[1].getText())
                var_type = self.visitLang_type(c)
                record_variables[var_name] = var_type

        #  define this record as a new type
        new_type = RecordType(record_variables)
        self.current_symbol_table = self.current_symbol_table.parent_scope

        #  remove the scope because records don't have a scope,
        #  we just needed it to add tew variables and define a new type
        self.current_symbol_table.remove_child_scope('current_record')
        return new_type.get_id()

    # Visit a parse tree produced by HelloParser#arrayType.
    def visitArrayType(self, ctx):
        #  get the type of array elements
        nested_type = self.visitChildren(ctx)

        #  create a new type of array
        new_type = ArrayType(nested_type)

        #  check type in case the size of the array is defined with expression
        expression = ctx.children[2]
        if self.visitExpression(expression) != PrimitiveType.integer:
            raise Exception('Array size can only be integer')
        return new_type.get_id()

    # Visit a parse tree produced by HelloParser#statement.
    def visitStatement(self, ctx):
        return self.visitChildren(ctx)

    # Visit a parse tree produced by HelloParser#assignment.
    def visitAssignment(self, ctx):
        #  getting assignment contexts and their types
        lhs = ctx.modifiablePrimary()
        rhs = ctx.expression()
        lhs_type = self.visitModifiablePrimary(lhs)
        rhs_type = self.visitExpression(rhs)

        #  checking assignment types compatibility
        if TypeTable.get_type_name(lhs_type) == 'ArrayType':
            #  if trying to assign incompatible type to an array element
            if TypeTable.table[lhs_type].nested_type_id != rhs_type:
                raise Exception(
                    'Cannot assign {} to array with elements of type {}'.
                    format(rhs_type, lhs_type))
            else:
                return self.visitChildren(ctx)
        #  check for assignment of real to boolean
        elif not TypeUtils.are_compatible_for_assignment(lhs_type, rhs_type):
            raise Exception(
                'Types {} and {} are not compatible for assignment'.format(
                    TypeTable.get_type_name(lhs_type),
                    TypeTable.get_type_name(rhs_type)))

    # Visit a parse tree produced by HelloParser#routineCall.
    def visitRoutineCall(self, ctx):
        #  getting context children, routine name and return type
        children = ctx.children
        routine_name = self.unicode_to_str(ctx.Identifier().getText())
        return_type = self.current_symbol_table.get_routine_info(
            routine_name).return_type
        routine_parameters = self.current_symbol_table.get_routine_info(
            routine_name).parameters

        #  check if routine was defined
        if not self.current_symbol_table.routine_defined_in_scope(
                routine_name):
            raise Exception('Routine {} is not defined'.format(routine_name))

        #  constructing routine call argument list
        arguments = []
        for c in children:
            if type(c) == HelloParser.ExpressionContext:
                arguments.append(c)

        #  check number of arguments compatibility
        if len(routine_parameters) != len(arguments):
            raise Exception(
                "Wrong number of arguments in routine call {}".format(
                    routine_name))

        #  check argument types and parameter types compatibility
        for p, a in zip(routine_parameters, arguments):
            argument_type = self.visitExpression(a)
            if not TypeUtils.are_compatible_for_assignment(p, argument_type):
                raise Exception(
                    'Parameter of type {} and argument of type {} are not compatible in {} routine call'
                    .format(TypeTable.get_type_name(p),
                            TypeTable.get_type_name(argument_type),
                            routine_name))
        return return_type

    # Visit a parse tree produced by HelloParser#whileLoop.
    def visitWhileLoop(self, ctx):
        #  creating new scope for while loop
        self.current_symbol_table = self.current_symbol_table.create_child_scope(
            self.current_symbol_table.get_new_inner_scope_name())

        #  visiting while loop context children
        self.visitChildren(ctx)

        #  returning to higher scope
        self.current_symbol_table = self.current_symbol_table.parent_scope

    # Visit a parse tree produced by HelloParser#forLoop.
    def visitForLoop(self, ctx):
        #  creating new scope for 'for' loop
        self.current_symbol_table = self.current_symbol_table.create_child_scope(
            self.current_symbol_table.get_new_inner_scope_name())

        #  adding loop iteration variable to loops scope
        identifier = self.unicode_to_str(ctx.Identifier().getText())
        self.current_symbol_table.add_variable(identifier,
                                               PrimitiveType.integer)

        #  visiting for loop context children
        self.visitChildren(ctx)

        #  returning to higher scope
        self.current_symbol_table = self.current_symbol_table.parent_scope

    # Visit a parse tree produced by HelloParser#lang_range.
    def visitLang_range(self, ctx):
        #  getting context children, start and end of the range and theit types
        children = ctx.children
        start_range = children[0]
        end_range = children[2]
        start_type = self.visitExpression(start_range)
        end_type = self.visitExpression(end_range)

        #  check range boundaries to be integers
        if start_type != PrimitiveType.integer or end_type != PrimitiveType.integer:
            raise Exception('Range boundaries are not integer numbers')

    # Visit a parse tree produced by HelloParser#ifStatement.
    def visitIfStatement(self, ctx):
        #  getting context children
        children = ctx.children
        expression = children[1]

        #   creating new scope for if statement
        self.current_symbol_table = self.current_symbol_table.create_child_scope(
            self.current_symbol_table.get_new_inner_scope_name())

        #  check if condition to be boolean
        if self.visitExpression(expression) != PrimitiveType.boolean:
            raise Exception("Condition of if statement is not boolean")

        #  visit if body
        self.visitBody(children[3])

        #  returning to higher scope
        self.current_symbol_table = self.current_symbol_table.parent_scope

        #  check else case
        if len(children) > 5:
            #  creating new scope for else statement
            self.current_symbol_table = self.current_symbol_table.create_child_scope(
                self.current_symbol_table.get_new_inner_scope_name())

            #  visit else body
            self.visitBody(children[5])

            #  returning to higher scope
            self.current_symbol_table = self.current_symbol_table.parent_scope

    # Visit a parse tree produced by HelloParser#routineDeclaration.
    def visitRoutineDeclaration(self, ctx):
        # getting context children
        identifier = self.unicode_to_str(ctx.Identifier().getText())
        routine_parameters = ctx.parameters()
        routine_return_type = ctx.lang_type()
        return_expression = ctx.expression()
        body = ctx.body()

        #  check if routine with this name already exists
        if self.current_symbol_table.routine_defined_in_scope(identifier):
            raise Exception('Routine {} is already defined'.format(identifier))

        #  create a new scope for routine
        self.current_symbol_table = self.current_symbol_table.create_child_scope(
            identifier)

        #  check routine parameters declaration and construct a list woth those parameters
        if routine_parameters is not None:
            parameters_children = ctx.parameters().children
            declarations = []
            for i in range(len(parameters_children)):
                if i % 2 == 1:
                    declarations.append(parameters_children[i])
            parameters_list = []
            for d in declarations:
                _, t = self.visitParameterDeclaration(d)
                parameters_list.append(t)
        else:
            parameters_list = None

        #  check return type ans return statement consistency
        if routine_return_type is not None:
            return_type = self.visitLang_type(routine_return_type)
            if return_expression is None:
                raise Exception("Routine must have a return statement")
        else:
            return_type = None
            if return_expression is not None:
                raise Exception("Routine has no return type")

        #  add routine to the scope
        self.current_symbol_table.parent_scope.add_routine(
            identifier, parameters_list, return_type)

        #  visit body
        if body is not None:
            self.visitBody(body)

        #  check expression in return statement to be of routines return type
        if return_expression is not None:
            expr_type = self.visitExpression(return_expression)
            if return_type != expr_type:
                raise Exception("Return type must be {}".format(
                    TypeTable.get_type_name(return_type)))

        #  returning to higher scope
        self.current_symbol_table = self.current_symbol_table.parent_scope

    # Visit a parse tree produced by HelloParser#parameters.
    def visitParameters(self, ctx):
        return self.visitChildren(ctx)

    # Visit a parse tree produced by HelloParser#parameterDeclaration.
    def visitParameterDeclaration(self, ctx):
        #  getting context children
        identifier = self.unicode_to_str(ctx.children[0].getText())
        lang_type = self.visitLang_type(ctx)

        #  chech is parameter with this name is already defined
        if self.current_symbol_table.is_defined_in_current_scope(identifier):
            raise Exception(
                'Parameter with name {} is already defined'.format(identifier))

        #  add variable to current scope
        self.current_symbol_table.add_variable(identifier, lang_type)
        return identifier, lang_type

    # Visit a parse tree produced by HelloParser#body.
    def visitBody(self, ctx):
        return self.visitChildren(ctx)

    # Visit a parse tree produced by HelloParser#expression.
    def visitExpression(self, ctx):
        #  getting context children
        children = ctx.children

        #  if one child get and return type
        if len(children) <= 1:
            expression_type = self.visitRelation(children[0])
            if expression_type is None:
                raise Exception(
                    "Attempt to call a routine, which doesn't return anything")
            return expression_type

        #  if both relations are present get their types
        left_type = self.visitRelation(children[0])
        right_type = self.visitRelation(children[2])

        #  check if both are boolean
        if left_type != PrimitiveType.boolean or right_type != PrimitiveType.boolean:
            raise Exception(
                'Incompatible types {} and {} in expression, can be applied to boolean only'
                .format(TypeTable.get_type_name(left_type),
                        TypeTable.get_type_name(right_type)))

        #  return expression type
        expression_type = PrimitiveType.boolean
        return expression_type

    # Visit a parse tree produced by HelloParser#relation.
    def visitRelation(self, ctx):
        #  getting context children
        children = ctx.children

        #  if one child get and return type
        if len(children) <= 1:
            return self.visitSimple(children[0])

        #  if both relations are present check their type compatibility
        left_type = self.visitSimple(children[0])
        right_type = self.visitSimple(children[2])
        TypeUtils.deduce_type_comparable(left_type, right_type)
        return PrimitiveType.boolean

    # Visit a parse tree produced by HelloParser#simple.
    def visitSimple(self, ctx):
        #  getting context children
        children = ctx.children
        simple_type = self.visitFactor(children[0])

        #  if one child get and return type
        if len(children) <= 1:
            return simple_type

        #  get operator
        operator = children[1]

        #  if both relations are present check their type compatibility according to the operator
        if operator is not None:
            left = children[0]
            right = children[2]
            operator_text = self.unicode_to_str(operator.getText())
            if operator_text == '*':
                simple_type = TypeUtils.deduce_type(self.visitFactor(left),
                                                    self.visitFactor(right))
            elif operator_text == '/':
                simple_type = TypeUtils.deduce_type_division(
                    self.visitFactor(left), self.visitFactor(right))
            elif operator_text == '%':
                simple_type = TypeUtils.deduce_type_module(
                    self.visitFactor(left), self.visitFactor(right))
        return simple_type

    # Visit a parse tree produced by HelloParser#factor.
    def visitFactor(self, ctx):
        #  getting context children
        children = ctx.children
        factor_type = self.visitSummand(children[0])

        #  if one child get and return type
        if len(children) <= 1:
            return factor_type

        #  if both relations are present check their type compatibility
        if len(children) > 1:
            left = children[0]
            right = children[2]
            factor_type = TypeUtils.deduce_type(self.visitSummand(left),
                                                self.visitSummand(right))
        return factor_type

    # Visit a parse tree produced by HelloParser#summand.
    def visitSummand(self, ctx):
        #  getting context children
        children = ctx.children
        summand_type = self.visitChildren(ctx)

        #  if summand is an expression
        if len(children) == 3:
            return self.visitExpression(children[1])

        return summand_type

    # Visit a parse tree produced by HelloParser#primary.
    def visitPrimary(self, ctx):
        #  getting context children
        children = ctx.children
        child_type = self.visitChildren(ctx)
        int_lit = ctx.IntegerLiteral()
        real_lit = ctx.RealLiteral()
        routine_call = ctx.routineCall()

        #  deduce primary type
        if routine_call is not None:  # if primary is routine call
            type_id = self.visitRoutineCall(routine_call)
        elif int_lit is not None:  # if primary is integer
            type_id = PrimitiveType.integer
        elif real_lit is not None:  # if primary is real
            type_id = PrimitiveType.real
        elif self.unicode_to_str(
                children[0].getText()) == 'true' or self.unicode_to_str(
                    children[0].getText()) == 'false':  # if primary is boolean
            type_id = PrimitiveType.boolean
        else:  # if primary is modifiable primary
            type_id = child_type
        return type_id

    # Visit a parse tree produced by HelloParser#modifiablePrimary.
    def visitModifiablePrimary(self, ctx):
        # self.visitChildren(ctx)
        #  getting context children
        children = ctx.children
        record_calls = []

        #  if modifiable primary is a variable name
        if len(children) == 1:
            identifier = self.unicode_to_str(children[0].getText())
            #  check if variable was declared
            if not self.current_symbol_table.is_defined_in_scope(identifier):
                raise Exception(
                    'Variable {} is not defined'.format(identifier))
            #  return type of the variable from the symbol table
            return self.current_symbol_table.get_variable_info(
                identifier).variable_type
        #  if modifiable primary is a array identifier
        elif type(children[2]) is HelloParser.ExpressionContext:
            array_identifier = self.unicode_to_str(children[0].getText())
            #  check if array was declared
            if not self.current_symbol_table.is_defined_in_scope(
                    array_identifier):
                raise Exception('Array with name {} is not defined'.format(
                    array_identifier))
            #  return type of the array from the symbol table
            return TypeTable.get_type(
                self.current_symbol_table.get_variable_info(
                    array_identifier).variable_type).nested_type_id
        #  if modifiable primary is a record field access
        else:
            #  append identifiers of records and their fields to a list
            for i in range(len(children)):
                if i % 2 == 0:
                    identifier = self.unicode_to_str(children[i].getText())
                    # check that the first identifier is a declared variable with type record
                    if i == 0 and not self.current_symbol_table.is_defined_in_scope(
                            identifier):
                        raise Exception(
                            'Record with name {} is not defined'.format(
                                identifier))
                    record_calls.append(identifier)
            #  check validity of field calls
            type_id = self.current_symbol_table.get_variable_info(
                record_calls[0]).variable_type
            current_type = self.type_table.table[type_id]
            for i in range(len(record_calls) - 1):
                if record_calls[
                        i + 1] not in current_type.inner_declarations.keys():
                    raise Exception("Record {} doesn't have a field {}".format(
                        record_calls[i], record_calls[i + 1]))
                type_id = current_type.inner_declarations[record_calls[i + 1]]
                current_type = self.type_table.table[type_id]
            return type_id

    # Visit a parse tree produced by HelloParser#eos.
    def visitEos(self, ctx):
        return self.visitChildren(ctx)
Example #51
0
 def _initial(self):
     self.symbols = SymbolTable()
     self.next_addr = 16
 def __init__(self):
     self.table = SymbolTable(None, "root")
     self.actType = ""
Example #53
0
class F100Asm():
    def __init__(self):
        self.st = SymbolTable()
        self.pc = 0
        self.opcodes = [
            o()
            for o in (OpcodeF0_Jump, OpcodeF0_Shift, OpcodeF0_Halt,
                      OpcodeF0_Bit, OpcodeF1, OpcodeF2, OpcodeF3, OpcodeF4,
                      OpcodeF5, OpcodeF6, OpcodeF7, OpcodeF8, OpcodeF9,
                      OpcodeF10, OpcodeF11, OpcodeF12, OpcodeF13, OpcodeF15)
        ]

    def is_valid_opcode(self, opcode_str):
        for o in self.opcodes:
            if o.opcode_regexp.match(opcode_str):
                return o.__class__()
        return None

    def line_assemble(self,
                      opcode,
                      operands,
                      symbol_table,
                      suppress_errors=False):
        assembled_words = []
        warnings = []
        op = self.is_valid_opcode(opcode)
        if op == None:
            raise UserWarning("Unrecognized opcode %s" % opcode)
        (assembled_words, warnings) = op.assemble(opcode, operands,
                                                  symbol_table,
                                                  suppress_errors)
        return (assembled_words, warnings)

    def twopass_assemble(self, text, listingon=True):
        assembled_words = dict()
        for i in range(0, 2):
            assembled_words = self.assemble(text, i, listingon)
        return assembled_words

    def assemble(self, text, pass_number, listingon=True):
        ''' Build the text into lines of tokens and expressions'''

        error_count = 0
        warning_count = 0
        lineno = 1
        label_list = dict()

        assembled_words = dict()

        if pass_number > 0:
            print(header_text)

        for textline in text:

            warnings = []
            line_pc = self.pc
            line_words = []

            line = textline.strip()
            ## Strip out comments from the end of the line
            comment_start = line.find(";")
            if comment_start > -1:
                line = line[:comment_start].strip()
            ## Strip out label from the start of the line
            if re.match("([a-zA-Z_][a-zA-Z0-9_]*:)", line):
                line_label = str.upper(
                    re.match("([a-zA-Z_][a-zA-Z0-9_]*):", line).group(1))
                if line_label in label_list:
                    error_count += 1
                    print(
                        "Error, label %s on line %d has already been defined" %
                        (line_label, lineno))
                else:
                    label_list[line_label] = True
                self.st[line_label] = str(self.pc)
                line = line[len(line_label) + 1:].strip()
            ## Left now with a line which is either blank, directive or opcode
            if line != "":
                fields = [str.upper(f) for f in line.split()]
                t = fields[0]
                if directive_re.match(t):
                    try:
                        (self.pc,
                         line_words) = self.process_directive(t, fields[1:])
                    except ValueError as v:
                        if pass_number < 1:
                            pass
                        else:
                            error_count += 1
                            raise v
                    except SyntaxError as s:
                        error_count += 1
                        print("Syntax Error on line %d: %s" % (lineno, s))
                else:
                    try:
                        (line_words, warnings) = self.line_assemble(
                            t,
                            fields[1:],
                            self.st,
                            suppress_errors=True if pass_number < 1 else False)
                    except ValueError as v:
                        if pass_number > 0:
                            # Ignore undefined symbols on first pass
                            error_count += 1
                            print(v)
                    except (TypeError, UserWarning, SyntaxError) as e:
                        error_count += 1
                        if pass_number > 0:
                            print("Error on line %d" % lineno)
                            print(e)
                    if len(warnings) > 0:
                        for w in warnings:
                            if pass_number > 0:
                                print(w)
                    warning_count += len(warnings)
                    self.pc += len(line_words)

            if pass_number > 0 and listingon:
                ## Simple listing code
                fields = []
                fields.append(" %5d:" % lineno)
                if len(line_words) > 0:
                    if len(line_words) > 1:
                        fields.append("     %04X: %04X %04X " %
                                      (line_pc, line_words[0] & 0xFFFF,
                                       line_words[1] & 0xFFFF))
                    else:
                        fields.append("     %04X: %04X      " %
                                      (line_pc, line_words[0] & 0xFFFF))
                else:
                    fields.append(' ' * 21)
                fields.append(textline.strip())
                print(' '.join(fields))
                for d in line_words[2:]:
                    print("                   %04X " % (d & 0xFFFF)),

            lineno += 1
            if pass_number > 0:
                assembled_words[line_pc] = line_words

        if pass_number > 0:
            print(line_sep)
            print("# %d Error%s" %
                  (error_count, '' if error_count == 1 else 's'))
            print("# %d Warning%s" %
                  (warning_count, '' if warning_count == 1 else 's'))
            print(line_sep)
            print("# SymbolTable")
            for s in self.st.tostring().split('\n'):
                print("# %s" % s)

            if error_count > 0:
                raise UserWarning("Assembly finished with errors")
        return assembled_words

    def process_directive(self, directive, operands):
        new_pc = self.pc
        words = []

        if directive == ".EQU":
            self.st[operands[0]] = ''.join(operands[1:])
        if directive == ".ORG":
            new_pc = self.st.eval_expr(''.join(operands[0:]))
        elif directive == ".DATA" or directive == ".WORD":
            ## Need to resplit by commas rather than spaces
            data_words = (''.join(operands).split(','))
            new_pc += len(data_words)

            for d in data_words:
                ## Cant eval all expressions on first pass so return dummy data
                try:
                    words.append(self.st.eval_expr(d))
                except ValueError as v:
                    words.append(0xFF)

        return (new_pc, words)
class TypeChecker(NodeVisitor):
    def __init__(self):
        self.table = SymbolTable(None, "root")
        self.actType = ""

    def visit_NoneType(self, node):
        return 'None'

    def visit_Integer(self, node):
        return 'int'

    def visit_Float(self, node):
        return 'float'

    def visit_String(self, node):
        return 'string'

    def visit_Variable(self, node):
        definition = self.table.getGlobal(node.name)
        if definition is None:
            print "Undefined symbol {} in line {}".format(node.name, node.line)
        else:
            return definition.type

    def visit_BinExpr(self, node):
        left = self.visit(node.left)
        right = self.visit(node.right)
        op = node.op
        if ttype(op, left, right) is None:
            print "Bad expression {} in line {}".format(node.op, node.line)
        return ttype(op, left, right)

    def visit_AssignmentInstruction(self, node):
        definition = self.table.getGlobal(node.id)
        type = self.visit(node.expr)
        if definition is None:
            print "Used undefined symbol {} in line {}".format(
                node.id, node.line)
        elif type != definition.type and (definition.type != "float"
                                          and definition != "int"):
            print "Bad assignment of {} to {} in line {}.".format(
                type, definition.type, node.line)

    def visit_GroupedExpression(self, node):
        return self.visit(node.interior)

    def visit_FunctionExpression(self, node):
        if self.table.get(node.name):
            print "Function {} already defined. Line: {}".format(
                node.name, node.line)
        else:
            function = FunctionSymbol(node.name, node.retType,
                                      SymbolTable(self.table, node.name))
            self.table.put(node.name, function)
            self.actFunc = function
            self.table = self.actFunc.table
            if node.args is not None:
                self.visit(node.args)
            self.visit(node.body)
            self.table = self.table.getParentScope()
            self.actFunc = None

    def visit_CompoundInstruction(self, node):
        innerScope = SymbolTable(self.table, "innerScope")
        self.table = innerScope
        if node.declarations is not None:
            self.visit(node.declarations)
        self.visit(node.instructions)
        self.table = self.table.getParentScope()

    def visit_ArgumentList(self, node):
        for arg in node.children:
            self.visit(arg)
        self.actFunc.extractParams()

    def visit_Argument(self, node):
        if self.table.get(node.name) is not None:
            print "Argument {} already defined. Line: {}".format(
                node.name, node.line)
        else:
            self.table.put(node.name, VariableSymbol(node.name, node.type))

    def visit_InvocationExpression(self, node):
        funDef = self.table.getGlobal(node.name)
        if funDef is None or not isinstance(funDef, FunctionSymbol):
            print "Function {} not defined. Line: {}".format(
                node.name, node.line)
        else:
            if node.args is None and funDef.params != []:
                print "Invalid number of arguments in line {}. Expected {}".\
                    format(node.line, len(funDef.params))
            else:
                types = [self.visit(x) for x in node.args.children]
                expectedTypes = funDef.params
                for actual, expected in zip(types, expectedTypes):
                    if actual != expected and not (actual == "int"
                                                   and expected == "float"):
                        print "Mismatching argument types in line {}. Expected {}, got {}".\
                            format(node.line, expected, actual)
            return funDef.type

    def visit_ChoiceInstruction(self, node):
        self.visit(node.condition)
        self.visit(node.action)
        if node.alternateAction is not None:
            self.visit(node.alternateAction)

    def visit_WhileInstruction(self, node):
        self.visit(node.condition)
        self.visit(node.instruction)

    def visit_RepeatInstruction(self, node):
        self.visit(node.condition)
        self.visit(node.instructions)

    def visit_ReturnInstruction(self, node):
        if self.actFunc is None:
            print "Return placed outside of a function in line {}".format(
                node.line)
        else:
            type = self.visit(node.expression)
            if type != self.actFunc.type and (self.actFunc.type != "float"
                                              or type != "int"):
                print "Invalid return type of {} in line {}. Expected {}".format(
                    type, node.line, self.actFunc.type)

    def visit_Init(self, node):
        initType = self.visit(node.expr)
        if initType == self.actType or (initType == "int" and self.actType
                                        == "float") or (initType == "float" and
                                                        self.actType == "int"):
            if self.table.get(node.name) is not None:
                print "Invalid definition of {} in line: {}. Entity redefined".\
                    format(node.name, node.line)
            else:
                self.table.put(node.name,
                               VariableSymbol(node.name, self.actType))
        else:
            print "Bad assignment of {} to {} in line {}".format(
                initType, self.actType, node.line)

    def visit_Declaration(self, node):
        self.actType = node.type
        self.visit(node.inits)
        self.actType = ""

    def visit_PrintInstruction(self, node):
        self.visit(node.expr)

    def visit_LabeledInstruction(self, node):
        self.visit(node.instr)

    def visit_Program(self, node):
        print "Visiting program"
        self.visit(node.declarations)
        self.visit(node.fundefs)
        self.visit(node.instructions)
Example #55
0
 def visit_Instructions(self,node):
     # print "visiting Instructions"
     self.symbolTable = SymbolTable(self.symbolTable,'instructions')
     for element in node.list :
         element.accept(self)
     self.symbolTable = self.symbolTable.getParentScope()
Example #56
0
from SymbolTable import SymbolTable

if __name__ == "__main__":
    _symbolTable = SymbolTable()

    _symbolTable.add("N1")
    _symbolTable.add("N2")
    _symbolTable.add("AB")
    _symbolTable.add("BC")
    _symbolTable.add("N3")
    _symbolTable.add("A3")
    '''
    result = _symbolTable.search("N1")
    if result is None:
        print("None")
    else:
        print(result)

    result = _symbolTable.search("N4")
    if result is None:
        print("None")
    else:
        print(result)

    result = _symbolTable.search("15")
    if result is None:
        print("None")
    else:
        print(result)
        '''
class CompilationEngine:
    all_operators = {
        "+": "add",
        "-": "sub",
        "/": "div",
        "*": "mul",
        "&amp;": "and",
        "|": "or",
        "&gt;": "gt",
        "&lt;": "lt",
        "=": "eq"
    }

    def __init__(self, tokens, out_file):
        """
        initializing a new compile engine object
        :param tokens: the list of tokens created by the tokenizer
        :param out_file: the output file.
        """
        self.__tokens = tokens
        self.__file = out_file
        self.__i = 0
        self.__class_symbol = SymbolTable()
        self.__subroutine_symbol = SymbolTable()
        self.__cur_token = ()
        self.__class_name = ""
        self.__writer = VMWriter(out_file)
        self.__label_count = 0
        self.compile_class()
        self.__writer.close()

    def eat(self):
        """
        compiling a single token and move to the next one
        """
        self.__cur_token = self.__tokens[self.__i]
        self.__i += 1

    def get_token(self):
        return self.__cur_token[1]

    def peek(self):
        """
        checking the current token without compiling
        :return: the token
        """
        ret_val = self.__tokens[self.__i]
        return ret_val[1]

    def peek_type(self):
        """
        checking the current token type without compiling
        :return: the token type
        """
        ret_val = self.__tokens[self.__i]
        return ret_val[0]

    def peek_ll2(self):
        """
        checking two tokens ahead without compiling
        :return: the token
        """
        ret_val = self.__tokens[self.__i + 1]
        return ret_val[1]

    def compile_while_stat(self):  # i points to while
        """
        compiling while statement
        """
        self.eat()
        self.eat()
        label_true = "L%s" % self.__label_count
        self.__label_count += 1
        label_continue = "L%s" % self.__label_count
        self.__label_count += 1
        self.__writer.write_label(label_true)
        self.compile_expression()
        self.__writer.write_arithmetic("not")
        self.__writer.write_if(label_continue)
        self.eat()
        self.eat()
        self.compile_statements()
        self.__writer.write_go_to(label_true)
        self.eat()
        self.__writer.write_label(label_continue)

    def compile_return_stat(self):  # i points to return
        """
        compiling return statement
        """
        self.eat()
        if not self.peek() == ";":
            self.compile_expression()
        else:
            self.__writer.write_push("constant", 0)
        self.__writer.write_return()
        self.eat()

    def compile_do_stat(self):
        """
        compiling do statement
        """
        self.eat()
        self.compile_subroutine_call()
        self.__writer.write_pop("temp", 0)
        self.eat()

    def compile_if_stat(self):
        """
        compiling if statement
        """
        self.eat()
        self.eat()
        self.compile_expression()
        self.__writer.write_arithmetic("not")
        label_false = "L%s" % self.__label_count
        self.__label_count += 1
        label_continue = "L%s" % self.__label_count
        self.__label_count += 1
        self.__writer.write_if(label_false)
        self.eat()
        self.eat()
        self.compile_statements()
        self.__writer.write_go_to(label_continue)
        self.eat()
        self.__writer.write_label(label_false)
        if self.peek() == "else":
            self.eat()
            self.eat()
            self.compile_statements()
            self.eat()
        self.__writer.write_label(label_continue)

    def compile_class_var_dec(self):
        """
        compiling class variable declaration
        """
        self.eat()
        kind = self.get_token()
        if kind == "var":
            kind = SymbolTable.VAR
        self.var_dec_helper(kind, self.__class_symbol)

    def compile_var_dec(self):
        """
        compiling variable declaration
        """
        self.eat()
        self.var_dec_helper(SymbolTable.VAR, self.__subroutine_symbol)

    def var_dec_helper(self, kind, symbol_table):

        self.eat()
        type = self.get_token()
        self.eat()
        name = self.get_token()
        symbol_table.add(name, type, kind)
        cur_stat = self.peek()
        while cur_stat != ";":
            self.eat()
            self.eat()
            name = self.get_token()
            symbol_table.add(name, type, kind)
            cur_stat = self.peek()
        self.eat()

    def compile_subroutine_body(self, func_name, func_type):
        """
        compiling subroutine body
        """
        self.eat()
        cur_stat = self.peek()
        while cur_stat == "var":
            self.compile_var_dec()
            cur_stat = self.peek()
        self.__writer.write_function(
            func_name, self.__subroutine_symbol.var_count(SymbolTable.VAR))
        self.__subroutine_symbol.add("this", self.__class_name, "pointer")
        if func_type == "method":
            self.__writer.write_push(SymbolTable.ARG, 0)
            self.__writer.write_pop("pointer", 0)

        elif func_type == "constructor":
            self.__writer.write_push(
                "constant", self.__class_symbol.var_count(SymbolTable.FIELD))
            self.__writer.write_call("Memory.alloc", 1)
            self.__writer.write_pop("pointer", 0)
        self.compile_statements()
        self.eat()

    def compile_parameter_list(self):
        """
        compiling parameters list
        """
        cur_stat = self.peek()
        if cur_stat != ")":
            self.eat()
            type = self.get_token()
            self.eat()
            name = self.get_token()
            self.__subroutine_symbol.add(name, type, SymbolTable.ARG)
            cur_stat = self.peek()

        while cur_stat == ",":
            self.eat()
            self.eat()
            type = self.get_token()
            self.eat()
            name = self.get_token()
            self.__subroutine_symbol.add(name, type, SymbolTable.ARG)
            cur_stat = self.peek()

    def compile_class(self):
        """
        compiling class
        """
        self.eat()
        self.eat()
        self.__class_name = self.get_token()
        self.eat()
        cur_stat = self.peek()

        while cur_stat == "static" or cur_stat == "field":
            self.compile_class_var_dec()
            cur_stat = self.peek()

        while cur_stat != "}":
            self.compile_subroutine_dec()
            cur_stat = self.peek()
        self.eat()

    def compile_expression(self):
        """
        compiling expression
        """
        self.compile_term()
        cur_stat = self.peek()
        while cur_stat in CompilationEngine.all_operators.keys():
            self.eat()
            self.compile_term()
            self.compile_operation(cur_stat)
            cur_stat = self.peek()

    def compile_operation(self, op):
        """
        compiling operation
        :param op: current op
        """
        if op == "*":
            self.__writer.write_call("Math.multiply", 2)

        elif op == "/":
            self.__writer.write_call("Math.divide", 2)

        else:
            self.__writer.write_arithmetic(CompilationEngine.all_operators[op])

    def compile_statements(self):
        """
        compiling statements
        """
        while self.compile_statement():
            continue

    def compile_subroutine_call(self):
        """
        compiling subroutine call
        """
        self.eat()
        name = self.get_token()
        cur_stat = self.peek()
        if cur_stat == "(":
            self.eat()
            self.__writer.write_push("pointer", 0)
            args = self.compile_expression_list()
            self.eat()
            self.__writer.write_call(self.__class_name + "." + name, args + 1)
        else:
            self.eat()
            val = self.find(name)
            self.eat()
            var_name = self.get_token()
            self.eat()
            if not val:
                args = 0
            else:
                self.__writer.push_val(val)
                name = val[0]
                args = 1

            args += self.compile_expression_list()
            self.__writer.write_call(name + "." + var_name, args)
            self.eat()

    def compile_expression_list(self):
        """
        compiling expression list
        """
        args = 0
        cur_stat = self.peek()
        if cur_stat != ")":
            self.compile_expression()
            args += 1
            cur_stat = self.peek()

        while cur_stat == ",":
            self.eat()
            args += 1
            self.compile_expression()
            cur_stat = self.peek()

        return args

    def compile_statement(self):
        """
        compiling statement
        """
        cur_stat = self.peek()
        if cur_stat == "if":
            self.compile_if_stat()
        elif cur_stat == "while":
            self.compile_while_stat()
        elif cur_stat == "do":
            self.compile_do_stat()
        elif cur_stat == "return":
            self.compile_return_stat()
        elif cur_stat == "let":
            self.compile_let_stat()
        else:
            return 0  # when there is no more statements to compile
        return 1

    def compile_let_stat(self):
        """
        compiling let statement
        """
        self.eat()
        self.eat()
        name = self.get_token()
        data = self.find(name)
        kind = data[1]
        ind = data[2]

        if kind == "field":
            kind = "this"

        cur_stat = self.peek()
        if cur_stat == "[":
            self.compile_array(kind, ind)
        else:
            self.eat()
            self.compile_expression()
            self.__writer.write_pop(kind, ind)
        self.eat()  # eat ;

    def compile_subroutine_dec(self):
        """
        compiling subroutine declaration
        """
        self.eat()
        func_type = self.get_token()
        self.eat()
        self.eat()
        func_name = self.__class_name + "." + self.get_token()
        self.eat()
        if func_type == "method":
            self.__subroutine_symbol.add("this", self.__class_name,
                                         SymbolTable.ARG)
        self.compile_parameter_list()
        self.eat()
        self.compile_subroutine_body(func_name, func_type)
        self.__subroutine_symbol = SymbolTable()

    def compile_term(self):
        """
        compiling term
        """
        cur_stat = self.peek_type()
        if cur_stat == JackTokenizer.INT_CONST:
            self.__writer.write_push("constant", self.peek())
            self.eat()
            return

        if cur_stat == JackTokenizer.KEYWORD:
            if self.peek() == "null" or self.peek() == "false":
                self.__writer.write_push("constant", 0)

            elif self.peek() == "true":
                self.__writer.write_push("constant", 0)
                self.__writer.write_arithmetic("not")

            elif self.peek() == "this":
                self.__writer.write_push("pointer", 0)

            self.eat()
            return

        if cur_stat == JackTokenizer.STR_CONST:
            string1 = self.peek().replace('\t', "\\t")
            string2 = string1.replace('\n', "\\n")
            string3 = string2.replace('\r', "\\r")
            string = string3.replace('\b', "\\b")
            self.__writer.write_push("constant", len(string))
            self.__writer.write_call("String.new", 1)
            for ch in string:
                self.__writer.write_push("constant", ord(ch))
                self.__writer.write_call("String.appendChar", 2)
            self.eat()
            return

        cur_stat = self.peek()
        if cur_stat == "(":
            self.eat()
            self.compile_expression()
            self.eat()
            return

        if cur_stat == "-":
            self.eat()
            self.compile_term()
            self.__writer.write_arithmetic("neg")
            return

        if cur_stat == "~":
            self.eat()
            self.compile_term()
            self.__writer.write_arithmetic("not")
            return

        cur_stat = self.peek_ll2()
        if cur_stat == "[":
            self.eat()
            name = self.get_token()
            self.__writer.push_val(self.find(name))
            self.eat()
            self.compile_expression()
            self.__writer.write_arithmetic("add")
            self.__writer.write_pop("pointer", 1)
            self.__writer.write_push("that", 0)
            self.eat()
            return

        if cur_stat == "." or cur_stat == "(":
            self.compile_subroutine_call()
            return

        self.eat()  # varName
        name = self.get_token()
        self.__writer.push_val(self.find(name))
        return

    def find(self, name):
        """
        finding a variable name in symbol tables
        """
        val = self.__subroutine_symbol.get_data(name)
        if not val:
            val = self.__class_symbol.get_data(name)
        elif not val:
            return False
        return val

    def compile_array(self, kind, index):
        """
        compiling array assignment
        :param kind: var kind
        :param index: var index
        """
        self.eat()
        self.compile_expression()
        self.eat()
        self.__writer.write_push(kind, index)
        self.__writer.write_arithmetic("add")
        self.eat()
        self.compile_expression()
        self.__writer.write_pop("temp", 0)
        self.__writer.write_pop("pointer", 1)
        self.__writer.write_push("temp", 0)
        self.__writer.write_pop("that", 0)
Example #58
0
                    index = self.symbolTable.search(token)
                    if index is None:
                        self.symbolTable.add(token)
                    index = self.symbolTable.search(token)
                    self.pif.genPif(token, index)
                else:
                    raise Exception(
                        "There is a Lexical Error detected on line " +
                        str(count) + " for token " + token)
            count += 1


'''
Tests
'''
symTable = SymbolTable()
pif = ProgramInternalForm()
scanner = Scanner(symTable, pif)
scanner.scan()
print("symTable: ")
print(symTable)
print("pif: ")
print(pif)

symTable = SymbolTable()
pif = ProgramInternalForm()
scanner = Scanner(symTable, pif,
                  "D:\\Facultate\\Sem 1\\FLCD\\STlab2\\p1er.txt")
scanner.scan()
print("symTable: ")
print(symTable)
Example #59
0
class TypeChecker(object):
    
    def __init__(self):
        self.errorsOcurred=False
        self.classTables={}
        operators = ['+','-','*','/','%','|','&','^','&&','||','<<','>>','==','!=','<','>','<=','>=','f']
        self.types = ['int','float','string']
        self.ttype = dict((key,dict((key,{}) for key in self.types)) for key in operators)
        self.ttype['+']['int']['float'] = 'float'
        self.ttype['+']['float']['int'] = 'float'
        self.ttype['+']['float']['float'] = 'float'
        self.ttype['+']['int']['int'] = 'int'
        self.ttype['+']['string']['string'] = 'string'

        self.ttype['-']['int']['float'] = 'float'
        self.ttype['-']['float']['int'] = 'float'
        self.ttype['-']['float']['float'] = 'float'
        self.ttype['-']['int']['int'] = 'int'

        self.ttype['*']['int']['float'] = 'float'
        self.ttype['*']['float']['int'] = 'float'
        self.ttype['*']['float']['float'] = 'float'
        self.ttype['*']['int']['int'] = 'int'
        
        self.ttype['/']['int']['float'] = 'float'
        self.ttype['/']['float']['int'] = 'float'
        self.ttype['/']['float']['float'] = 'float'
        self.ttype['/']['int']['int'] = 'int'

        self.ttype['%']['int']['int'] = 'int'

        self.ttype['|']['int']['int'] = 'int'

        self.ttype['&']['int']['int'] = 'int'

        self.ttype['^']['int']['int'] = 'int'

        self.ttype['&&']['int']['int'] = 'int'

        self.ttype['||']['int']['int'] = 'int'

        self.ttype['<<']['int']['int'] = 'int'

        self.ttype['>>']['int']['int'] = 'int'

        self.ttype['==']['int']['int'] = 'int'
        self.ttype['==']['int']['float'] = 'int'
        self.ttype['==']['float']['int'] = 'int'
        self.ttype['==']['float']['float'] = 'int'
        self.ttype['==']['string']['string'] = 'int'

        self.ttype['!=']['int']['int'] = 'int'
        self.ttype['!=']['int']['float'] = 'int'
        self.ttype['!=']['float']['int'] = 'int'
        self.ttype['!=']['float']['float'] = 'int'
        self.ttype['!=']['string']['string'] = 'int'

        self.ttype['<']['int']['int'] = 'int'
        self.ttype['<']['int']['float'] = 'int'
        self.ttype['<']['float']['int'] = 'int'
        self.ttype['<']['float']['float'] = 'int'
        self.ttype['<']['string']['string'] = 'int'

        self.ttype['>']['int']['int'] = 'int'
        self.ttype['>']['int']['float'] = 'int'
        self.ttype['>']['float']['int'] = 'int'
        self.ttype['>']['float']['float'] = 'int'
        self.ttype['>']['string']['string'] = 'int'

        self.ttype['<=']['int']['int'] = 'int'
        self.ttype['<=']['int']['float'] = 'int'
        self.ttype['<=']['float']['int'] = 'int'
        self.ttype['<=']['float']['float'] = 'int'
        self.ttype['<=']['string']['string'] = 'int'

        self.ttype['>=']['int']['int'] = 'int'
        self.ttype['>=']['int']['float'] = 'int'
        self.ttype['>=']['float']['int'] = 'int'
        self.ttype['>=']['float']['float'] = 'int'
        self.ttype['>=']['string']['string'] = 'int'
        
        self.ttype['f']['string']['string'] = 'string'
        self.ttype['f']['int']['int'] = 'int'
        self.ttype['f']['float']['float'] = 'float'
        self.ttype['f']['float']['int'] = 'float'
    
    def error(self,text,line):
        self.errorsOcurred=True
        print("********************************")
        print("Error: " + text)
        print("Line " + str(line))
        exc_type, exc_obj, exc_tb = sys.exc_info()
        print(exc_type, exc_tb.tb_lineno)
        print("********************************")

    def visit_Program(self,node):
        try:
            # print "visiting Program"
            self.symbolTable=SymbolTable(None,'main')
            node.classdefs.accept(self)
            node.declarations.accept(self)
            node.fundefs.accept(self)
            node.instructions.accept(self)
        except:
            self.error("could not continue parsing, correct errors first",0)

    def visit_Declarations(self,node):
        # print "visiting Declarations"
        for element in node.list :
            element.accept(self)
    
    def visit_Declaration(self,node):
        # print "visiting Declaration"
        toReturn = []
        declType = node.typeOrId
        allInits = node.initsOrClassinits.accept(self)
        if(declType in self.types):
            for element in allInits:
                [typeOrId,id] = element
                if self.symbolTable.get(id.value) != None:
                    self.error("Symbol: "+id.value+", was previusly declared",id.line)
                try:
                    self.ttype['f'][declType][typeOrId]
                except:
                    self.error("cannot initialize symbol of type: "+declType+", with expression of type: "+typeOrId,id.value)
                self.symbolTable.put(id.value,typeOrId)
                toReturn.append(id.value)
        else:
            typeOrId=declType.accept(self)
            for id in allInits:
                if self.symbolTable.get(id.value) != None:
                    self.error("Symbol: "+id.value+", was previusly declared",id.line)
                self.symbolTable.put(id.value,typeOrId)

                tmp = typeOrId
                while tmp!=None:
                    classTable = self.classTables[tmp.id]
                    for element in classTable.map:
                        self.symbolTable.put(self.makeClassContentName(typeOrId.id,id.value,element), classTable.get(element))
                    tmp=tmp.parentClass
                toReturn.append(id.value)
        return toReturn

    def makeClassContentName(self, className, objectName, fieldName):
        return "__"+className+objectName+fieldName

    def visit_Inits(self,node):
        # print "visiting Inits"
        toReturn=[]
        for element in node.list:
            toReturn.append(element.accept(self))
        return toReturn
    
    def visit_Init(self,node):
        # print "visiting Init"
        return [node.expression.accept(self),node.id]

    def visit_Classinits(self,node):
        # print "visiting Classinits"
        toReturn=[]
        for element in node.list:
            toReturn.append(element.accept(self))
        return toReturn

    def visit_Classinit(self,node):
        # print "visiting Classinit"
        return node.id

    def visit_Instructions(self,node):
        # print "visiting Instructions"
        self.symbolTable = SymbolTable(self.symbolTable,'instructions')
        for element in node.list :
            element.accept(self)
        self.symbolTable = self.symbolTable.getParentScope()
            
    def visit_PrintInstr(self,node):
        # print "visiting PrintInstr"
        if node.expression.accept(self) not in ['string','int','float']:
            self.error("cannot print expression of that type",node.line)
        
    def visit_LabeledInstr(self,node):
        # print "visiting LabeledInstr"
        node.instruction.accept(self)
    
    def visit_Assignment(self,node):
        # print "visiting Assignment"
        try:
            idType = node.access.accept(self)
            exprType = node.expression.accept(self)
            self.ttype['f'][idType][exprType]
        except:
            self.error("cannot assign "+exprType+" to "+idType,node.id.line)
            
    
    def visit_ChoiceInstr(self,node):
        # print "visiting ChoiceInstr"
        node.condition.accept(self)
        node.instruction.accept(self)
        node.elseInstruction.accept(self)
        
    def visit_Break(self,node):
        # print "visiting Break"
        pass
    
    def visit_Continue(self,node):
        # print "visiting Continue"
        pass
    
    def visit_WhileInstr(self,node):
        # print "visiting While"
        node.condition.accept(self)
        self.symbolTable = SymbolTable(self.symbolTable,'while')
        node.instruction.accept(self)
        self.symbolTable = self.symbolTable.getParentScope()
    
    def visit_RepeatInstr(self,node):
        # print "visiting Repeat"
        node.instructions.accept(self)
        node.condition.accept(self)
    
    def visit_ReturnInstr(self,node):
        # print "visiting Return"
        node.expression.accept(self) #todo check somehow
    
    def visit_CompoundInstr(self,node):
        # print "visiting CompoundInstr"
        #self.symbolTable = SymbolTable(self.symbolTable,'compoundInstr')
        node.declarations.accept(self)
        node.instructions.accept(self)
        #self.symbolTable = self.symbolTable.getParentScope()
    
    def visit_Condition(self,node):
        # print "visiting Condition"
        if node.expression.accept(self) not in ('int'):
            self.error("condition must be of int type",node.line)
    
    def visit_Integer(self,node):
        # print "visiting Integer"
        return 'int'
    
    def visit_Float(self,node):
        # print "visiting Float"
        return 'float'
    
    def visit_String(self,node):
        # print "visiting String"
        return 'string'
    
    def visit_Id(self,node):
        # print "visiting Id"
        if self.symbolTable.getIncludingParents(node.value):
            return self.symbolTable.getIncludingParents(node.value)
        self.error("undefined symbol: "+node.value,node.line)
    
    def visit_ParExpr(self,node):
        # print "visiting ParExpr"
        return node.expression.accept(self)   
        
    def visit_BinExpr(self,node):
        operator = node.operator
        first = node.first.accept(self)
        second = node.second.accept(self)            
            
        # print "visiting BinExpr"
        #print first
        #print operator
        #print second
        try:
            return self.ttype[operator][first][second]
        except:
            self.error("cannot compute operation: " +operator+",on arguments: "+first+", "+second,node.first.line)
            
          
    def visit_FunExpr(self,node):
        # print "visiting FunExpr"
        funSymbol = node.access.accept(self)
        for i in range(len(node.expressionList.list)):
            try:
                baseArgType = funSymbol.argList[i]
                givenArgType = node.expressionList.list[i].accept(self)
                self.ttype['f'][baseArgType][givenArgType]
            except:
                self.error("bad argument in funcall",node.line)
        return funSymbol.type
    
    def visit_ExprList(self,node):
        # print "visiting ExprList"
        toReturn = []
        for element in node.list:
            toReturn.append(element.accept(self))
        return toReturn
    
    def visit_FunDefs(self,node):
        # print "visiting FunDefs"
        for element in node.list :
            element.accept(self)
            
    def visit_FunDef(self,node):
        # print "visiting FunDef"
        self.symbolTable = SymbolTable(self.symbolTable,node.id.value)
        self.symbolTable.getParentScope().put(node.id.value,FunSymbol(node.typeOrId, node.id.value, map(lambda x: x.accept(self),node.argList.list)))
        node.compoundInstr.accept(self)
        self.symbolTable = self.symbolTable.getParentScope()
        return node.id.value
        
    def visit_ArgList(self,node):
        # print "visiting ArgList"
        toReturn = []
        for element in node.list:
            toReturn.append(element.accept(self))
        return toReturn
    
    def visit_Arg(self,node):
        # print "visiting Arg"
        self.symbolTable.put(node.id.value,node.typeOrId)
        return node.typeOrId

    def visit_ClassDefs(self,node):
        # print "visiting ClassDefs"
        for element in node.list :
            element.accept(self)

    def visit_ClassDef(self,node):
        # print "visiting ClassDef"
        self.symbolTable = SymbolTable(self.symbolTable if node.parentId == None else self.classTables[node.parentId.value], node.id.value)
        classSymbol = ClassSymbol(node.accessmodificator, node.id.value, node.parentId if node.parentId == None else node.parentId.accept(self),node.classcontent.accept(self))
        self.classTables[node.id.value]=self.symbolTable
        while self.symbolTable.parent!=None:
            self.symbolTable = self.symbolTable.getParentScope()
        self.symbolTable.put(node.id.value,classSymbol)



    def visit_Access(self,node):
        # print "visiting Access"
        accessedObject = node.list[0].accept(self)
        if(isinstance(accessedObject, ClassSymbol)):
            if(accessedObject.hasAccess(self.symbolTable.getFirstRelevantScopeName(),node.list[1].value)):
                classContentName = self.makeClassContentName(accessedObject.id,node.list[0].value,node.list[1].value)
                if self.symbolTable.getIncludingParents(classContentName):
                    accessedObject=self.symbolTable.getIncludingParents(classContentName)
                else:
                    self.error("cannot find class content "+classContentName,0)
            else:
                self.error("trying to access field that is not visible", 0)
        return accessedObject

    def visit_Fielddefs(self, node):
        # print "visiting Fielddefs"
        toReturn=[]
        for element in node.list:
            for accessAndId in element.accept(self):
                toReturn.append(accessAndId)
        return toReturn

    def visit_Fielddef(self, node):
        # print "visiting Fielddef"
        toReturn=[]
        for element in node.declaration.accept(self):
            toReturn.append((element,node.accessmodificator))
        return toReturn

    def visit_Classcontent(self, node):
        # print "visiting Classcontent"
        toReturn={}
        for element in node.fielddefs.accept(self):
            toReturn[element[0]]=element[1]
        for element in node.methoddefs.accept(self):
            toReturn[element[0]]=element[1]
        return toReturn

    def visit_Methoddefs(self, node):
        # print "visiting Methoddefs"
        toReturn=[]
        for element in node.list:
            toReturn.append(element.accept(self))
        return toReturn

    def visit_Methoddef(self, node):
        # print "visiting Methoddef"
        return (node.fundef.accept(self), node.accessmodificator)
Example #60
0
def translate_Cinstr_to_bin(c_instr: str) -> str:
    a = '111'
    c = parser.get_comp_bin(c_instr=c_instr)
    d = parser.get_dest_bin(c_instr=c_instr)
    j = parser.get_jump_bin(c_instr=c_instr)
    c_instr_bin = f"{a}{c}{d}{j}"
    return c_instr_bin


"""
main.py drives the entire
translation process of the
assembler.
"""
parser = Parser(inFile="inFile.asm")
symbolTable = SymbolTable()

#############
# FIRST PHASE
first_pass(parser.file)

#############
# SECOND PASS
outFile = open('outFile.hack', 'w')

while parser.hasMoreCommands():
    # Set 'next_cmd' to 'curr_cmd'
    curr_cmd = parser.advance()
    curr_cmdType = parser.commandType(curr_cmd)

    if curr_cmdType == 'C_COMMAND':