Example #1
0
class Main:
    def __init__(self):
        self.st = SymbolTable(17)
        self.pif = PIF()
        self.scanner = Scanner()

    def run(self):
        readFile()
        fileName = "p1.txt"
        exceptionMessage = ""

        with open(fileName, 'r') as file:
            lineCounter = 0
            for line in file:
                lineCounter += 1
                tokens = self.scanner.tokenize(line.strip())
                extra = ''
                for i in range(len(tokens)):
                    if tokens[i] in reservedWords + separators + operators:
                        if tokens[i] == ' ':  # ignore adding spaces to the pif
                            continue
                        self.pif.add(tokens[i], (-1, -1))
                    elif tokens[
                            i] in self.scanner.cases and i < len(tokens) - 1:
                        if re.match("[1-9]", tokens[i + 1]):
                            self.pif.add(tokens[i][:-1], (-1, -1))
                            extra = tokens[i][-1]
                            continue
                        else:
                            exceptionMessage += 'Lexical error at token ' + tokens[
                                i] + ', at line ' + str(lineCounter) + "\n"
                    elif self.scanner.isIdentifier(tokens[i]):
                        id = self.st.add(tokens[i])
                        self.pif.add("id", id)
                    elif self.scanner.isConstant(tokens[i]):
                        const = self.st.add(extra + tokens[i])
                        extra = ''
                        self.pif.add("const", const)
                    else:
                        exceptionMessage += 'Lexical error at token ' + tokens[
                            i] + ', at line ' + str(lineCounter) + "\n"

        with open('symboltable.out', 'w') as writer:
            writer.write(str(self.st))

        with open('pif.out', 'w') as writer:
            writer.write(str(self.pif))

        if exceptionMessage == '':
            print("Lexically correct")
        else:
            print(exceptionMessage)
Example #2
0
class Scanner:
    def __init__(self, file):
        self._Identifiers = SymbolTable()
        self._Constants = SymbolTable()
        self._PIF = ProgramInternalForm()
        self._filename = file

    def run(self):
        self.Tokenize(self._filename)
        f = open("codif_table.txt", "w")

        for k, v in codification_table.items():
            f.write(str(k) + " " + str(v) + '\n')
        f.close()
        print('Identifiers: \n')
        self._Identifiers.PrintSymbolTable()
        print('Constants: \n')
        self._Constants.PrintSymbolTable()
        print(self._PIF)

    def fillIdentifiers(self, filename):
        line_number = 0
        with open(filename, "r") as file:
            for line in file:
                line_number += 1
                for token in self.GetTokensFromLine(line.strip(), separators):
                    if self.IsIdentifier(
                            token
                    ) and token not in reserved + operators + separators:
                        self._Identifiers.add(token)
        file.close()

    def Tokenize(self, filename):
        self.fillIdentifiers(filename)
        line_number = 0
        with open(filename, "r") as file:
            for line in file:
                line_number += 1
                for token in self.GetTokensFromLine(line.strip(), separators):

                    if token in reserved + operators + separators:
                        if token is not ' ':
                            self._PIF.add(codification_table[token], -1)

                    elif self.IsIdentifier(token):
                        # self._Identifiers.add(token)
                        self._PIF.add(codification_table['identifier'],
                                      self._Identifiers.get(token))

                    elif self.IsConstant(token) or self.IsNegativeNumber(
                            token):
                        self._Constants.add(token)
                        self._PIF.add(codification_table['constant'],
                                      self._Constants.get(token))

                    else:
                        raise Exception("Unknown token " + token +
                                        " at line " + str(line_number))

        file.close()

    def GetTokensFromLine(self, line, listOfSeparators):
        token = ""
        index = 0
        tokens = []
        openBrack = 0
        closeBrack = 0
        x = len(line)
        while index < len(line):
            if line[index] == '"':
                if token:
                    tokens.append(token)
                token, index = self.GetStringFromToken(line, index)
                tokens.append(token)
                token = ''

            elif self.IsOperatorChar(line[index]):
                if token:
                    tokens.append(token)
                token, index = self.GetOperatorFromToken(line, index)
                tokens.append(token)
                token = ''

            elif line[index] in listOfSeparators:
                if token:
                    tokens.append(token)
                if token == '{':
                    openBrack += 1
                if token == '}':
                    closeBrack += 1
                token, index = line[index], index + 1
                tokens.append(token)
                token = ''
            else:
                token += line[index]
                index += 1
        if token:
            tokens.append(token)

        return tokens

    def IsConstant(self, token):
        return re.match(
            '^(0|[1-9][0-9]*)$|^\'[a-zA-Z0-9]\'$|^\"[A-Za-z0-9]+\"$',
            token) is not None

    def IsNegativeNumber(self, token):
        return re.match('^-[1-9][0-9]*$', token) is not None

    def IsIdentifier(self, token):
        return re.match('^[a-z]([a-zA-Z0-9]){,7}$', token) is not None

    def IsOperatorChar(self, char):
        for op in operators:
            if char in op:
                return True
        return False

    def GetOperatorFromToken(self, line, index):
        token = ''
        tokenForNeg = ''
        tokenForNeg += line[index] + line[index + 1]

        if self.IsNegativeNumber(tokenForNeg):
            token += tokenForNeg
            index += 2
            return token, index
        elif line[index] == '=' and line[index] == '-':
            token += line[index]
            index += 1
            return token, index
        # elif self.IsNegativeNumber(tokenForNeg):
        #     token += tokenForNeg
        #     index += 2
        #     return token, index
        while index < len(line) and self.IsOperatorChar(
                line[index]) and self.IsOperatorChar(token + line[index]):
            token += line[index]
            index += 1
        return token, index

    def GetStringFromToken(self, line, index):
        token = ''
        quote_count = 0
        while index < len(line) and quote_count < 2:
            if line[index] == '"':
                quote_count += 1
            token += line[index]
            index += 1
        return token, index
Example #3
0
class Visitor(NodeVisitor):

    def print_error(self):
        if len(error_vector) > 0:
            for i in error_vector:
                print(i)

    def error(self, message):
        error_vector.append("Error: " + message)

    '''
    Program visitor class. This class uses the visitor pattern. You need to define methods
    of the form visit_NodeName() for each kind of AST node that you want to process.
    Note: You will need to adjust the names of the AST nodes if you picked different names.
    '''

    def __init__(self):
        # Initialize the symbol table
        self.symtab = SymbolTable()

        # Add built-in type names (int, float, char) to the symbol table
        # self.symtab.add("int", uctype.IntType)
        # self.symtab.add("float",uctype.float_type)
        # self.symtab.add("char",uctype.char_type)

    def visit_Program(self, node):
        # 1. Visit all of the global declarations
        # 2. Record the associated symbol table
        self.symtab.begin_scope()
        for _decl in node.gdecls:
            self.visit(_decl)
        self.symtab.end_scope()

    def visit_GlobalDecl(self, node):
        for _decl in node.decls:
            self.visit(_decl)

    def visit_Decl(self, node):
        type = self.visit(node.type)
        if isinstance(node.type, ast.ArrayDecl):
            if type == 'int':
                self.symtab.add(node.name.name, uctype.IntArrayType)
            elif type == 'float':
                self.symtab.add(node.name.name, uctype.FloatArrayType)
            elif type == 'char':
                self.symtab.add(node.name.name, uctype.CharArrayType)
        elif isinstance(node.type, ast.FuncDecl):
            if type == 'int':
                self.symtab.add(node.name.name, uctype.IntType)
            elif type == 'float':
                self.symtab.add(node.name.name, uctype.FloatType)
            elif type == 'char':
                self.symtab.add(node.name.name, uctype.CharType)
            elif type == 'void':
                self.symtab.add(node.name.name, uctype.VoidType)
            else:
                self.error("invalid type")
        else:
            if type == 'int':
                self.symtab.add(node.name.name, uctype.IntType)
            elif type == 'float':
                self.symtab.add(node.name.name, uctype.FloatType)
            elif type == 'char':
                self.symtab.add(node.name.name, uctype.CharType)
            elif type == 'void':
                self.symtab.add(node.name.name, uctype.VoidType)
            else:
                self.error("invalid type")

            init = self.visit(node.init)
            if init is not None:
                if init != type:
                    self.error("initializer mismatch")

        # if isinstance(node.type, ast.FuncDecl):
        #     if type != self.symtab.lookup(node.type.type.declname.name):
        #         self.error("wrong func {} type association".format(node.name.name))
        #  elif isinstance(node.type, ast.VarDecl):
        #      type_reg = self.symtab.lookup(node.type.declname.name)
        #      if type != self.symtab.lookup(node.type.declname.name):
        #          self.error("wrong variable type association")
        #  elif isinstance(node.type, ast.ArrayDecl):
        #      type_reg = self.symtab.lookup(node.type.type.declname.name)
        #      if type_reg.typename is not None:
        #          if type_reg == uctype.CharType:
        #              if isinstance(node.init, ast.BinaryOp):
        #                  if node.init.left.type == 'char' is False or node.init.right.type == 'char' is False:
        #                      self.error("error")
        #          elif node.init is not None:
        #              if isinstance(node.init, ast.Constant):
        #                  pass
        #              else:
        #                  for i in node.init.exprs:
        #                      if i.type != type_reg.typename:
        #                          self.error("Error: element on the array is not correct")
        #      else:
        #          self.error("Error. Variable {} not defined".format(node.type.type.declname.name))

    def visit_VarDecl(self, node):
        node1 = self.visit(node.declname)
        node2 = self.visit(node.type)
        if node2 == "int":
            # self.symtab.add(node1.name, uctype.IntType)
            return 'int'
        elif node2 == "char":
            # self.symtab.add(node1.name, uctype.CharType)
            # return uctype.CharType
            return 'char'
        elif node2 == "float":
            # self.symtab.add(node1.name, uctype.FloatType)
            # return uctype.FloatType
            return 'float'
        elif node2 == "void":
            # self.symtab.add(node1.name, uctype.VoidType)
            # return uctype.VoidType
            return 'void'
        else:
            self.error("variable {} has an invalid type : ".format(node1.name))

    def visit_Type(self, node):
        return node.names[0]

    def visit_BinaryOp(self, node):
        # 1. Make sure left and right operands have the same type
        # 2. Make sure the operation is supported
        # 3. Assign the result type
        left_type = self.visit(node.left)
        right_type = self.visit(node.right)
        if left_type == right_type:
            if uctype.constant_type(left_type).binary_ops.__contains__(node.op) is False:
                self.error("binary operation not supported")
                return None
        else:
            self.error("type mismatch on binary operation")

        return left_type

    def visit_Assignment(self, node):
        # ToDo: TYPECHECKING
        # ## 1. Make sure the location of the assignment is defined
        # sym = self.symtab.lookup(node.location)
        # assert sym, "Assigning to unknown sym"
        # ## 2. Check that the types match
        # self.visit(node.value)
        # assert sym.type == node.value.type, "Type mismatch in assignment"
        self.visit(node.lvalue)
        self.visit(node.rvalue)

    def visit_ID(self, node):
        type = self.symtab.lookup(node.name)
        if type is not None:
            return type.typename
        else:
            return None

    def visit_NoneType(self, node):
        pass

    def visit_Cast(self, node):
        self.visit(node.new_type)
        self.visit(node.expr)

    def visit_Constant(self, node):
        return node.type

    def visit_Break(self, node):
        pass

    def visit_Assert(self, node):
        self.visit(node.expr)

    def visit_Print(self, node):
        pass

    def visit_Read(self, node):
        self.visit(node.expr)

    def visit_If(self, node):
        self.symtab.begin_scope()
        self.visit(node.cond)
        self.visit(node.iftrue)
        self.visit(node.iffalse)
        self.symtab.end_scope()

    def visit_FuncDef(self, node):
        # type = self.visit(node.spec)
        self.visit(node.decl)
        self.visit(node.body)
        if node.param_decls is not None:
            for _decl in node.param_decls:
                self.visit(_decl)

    def visit_While(self, node):
        self.symtab.begin_scope()
        self.visit(node.cond)
        self.visit(node.statement)
        self.symtab.end_scope()

    def visit_Compound(self, node):
        self.symtab.begin_scope()
        for _decl in node.block_items:
            self.visit(_decl)
        self.symtab.end_scope()

    def visit_DeclList(self, node):
        for _decl in node.decls:
            self.visit(_decl)

    def visit_For(self, node):
        self.symtab.begin_scope()
        self.visit(node.initial)
        self.visit(node.cond)
        self.visit(node.next)
        self.visit(node.statement)
        self.symtab.end_scope()

    def visit_EmptyStatement(self, node):
        pass

    def visit_Return(self, node):
        self.visit(node.expr)

    def visit_UnaryOp(self, node):
        self.visit(node.expr)

    def visit_ExprList(self, node):
        for _decl in node.exprs:
            self.visit(_decl)

    def visit_FuncCall(self, node):
        self.visit(node.name)
        self.visit(node.args)

    def visit_InitList(self, node):
        for _decl in node.exprs:
            self.visit(_decl)

    def visit_ParamList(self, node):
        for _decl in node.params:
            self.visit(_decl)

    def visit_FuncDecl(self, node):
        self.visit(node.args)
        type = self.visit(node.type)
        return type

    def visit_ArrayRef(self, node):
        self.visit(node.name)
        self.visit(node.subscript)

    def visit_ArrayDecl(self, node):
        type = self.visit(node.type)
        self.visit(node.dim)
        return type
Example #4
0
    constantSymbolTable = SymbolTable()
    HashTable = HashTable()
    PIF = ProgramInternalForm()

    with open(filename, 'r') as file:
        noOfLine = 0

        for line in file:
            noOfLine += 1
            # generate token for the whole lines except the last one
            for token in generateToken(line[0:-1], separators):
                if token in separators + operators + reservedWords:
                    PIF.add(codification[token], -1)

                elif isIdentifier(token):
                    pos = identifierSymbolTable.add(token)
                    PIF.add(codification['identifier'], HashTable.hashCode(token))

                elif isConstant(token):
                    pos = constantSymbolTable.add(token)
                    PIF.add(codification['constant'], HashTable.hashCode(token))

                else:
                    raise Exception('Unknown token ' + token + ' at line ' + str(noOfLine))

        with open('D:\\An 3\\Sem 1\\LFTC\\Lab1-lexicalAnalyzer\\outputs\\PIF.txt', 'w') as f:
            f.write('The Program Internal Form:\n')
            for e in PIF.getPIF():
                f.write("%s\n" % str(e))

        f.close()
Example #5
0
for line in lines:
    counter += 1
    tokens = tokenize(line[0:-1], separators)
    i = 0
    while i in range(len(tokens)):
        if tokens[i] in toEncode:
            if tokens[i] == ' ':
                i += 1
            elif tokens[i] != '+' and tokens[i] != '-':
                pif.add(codification[tokens[i]], -1)
                i += 1
            elif (tokens[i] == '-' or tokens[i] == '+') and isIntConstant(
                    tokens[i + 1]) and tokens[i - 1] in toEncode:
                pos = symbolTable.get(tokens[i] + tokens[i + 1])
                if pos is None:
                    pos = symbolTable.add(int(tokens[i + 1]),
                                          tokens[i] + tokens[i + 1])
                pif.add(codification['constant'], pos)
                i += 2
            elif (tokens[i] == '-' or tokens[i] == '+') and isIdentifier(
                    tokens[i + 1]) and tokens[i - 1] in toEncode:
                pos = symbolTable.get(tokens[i] + tokens[i + 1])
                if pos is None:
                    pos = symbolTable.add(len(tokens[i + 1]),
                                          tokens[i] + tokens[i + 1])
                pif.add(codification['constant'], pos)
                i += 2
            else:
                pif.add(codification[tokens[i]], -1)
                i += 1
        elif isIdentifier(tokens[i]):
            pos = symbolTable.get(tokens[i])
Example #6
0
        dict[l[0]] = l[1][:-1]
    return dict


if __name__ == '__main__':
    token = generate_dict("specification.in")
    file = open('input.txt', 'r')
    st = SymbolTable()
    pif = PIF()
    line_index = 0

    actual_string = ""

    while file is not None:
        while file.read(1) != " ":
            actual_string.join(file.read(1))
        if Scanner.is_identifier(actual_string):
            pif.insert(0, actual_string)
            actual_string = ""
        elif Scanner.is_constant(actual_string):
            pif.insert(1, actual_string)
            actual_string = ""
        elif actual_string in token.keys():
            st.add(actual_string)
            pos = st.search(actual_string)
            pif.insert(pos, actual_string)
            actual_string = ""
        else:
            print("Unknown token '{}'!".format(actual_string))

Example #7
0
            print([token for token in tokenGenerator(line,separators)])

    symbolTable = SymbolTable()
    pif = ProgramInternalForm()

    with open(fileName, 'r') as file:
        lineNo = 0
        print ("\n")
        for line in file:
            lineNo += 1
            for token in tokenGenerator(line[0:-1], separators):
                if token in separators + operators + reservedWords:
                    pif.add(codification[token], -1)
                elif isIdentifier(token):
                    print ('Identif - \t' + token)
                    id = symbolTable.add(token)
                    pif.add(codification['identifier'], id)
                elif isConstant(token):
                    print ('Const - \t' + token)
                    id = symbolTable.add(token)
                    pif.add(codification['constant'], id)
                else:
                    raise Exception('Unknown token ' + token + ' at line ' + str(lineNo))

    print('\nProgram internal form:\n', pif)

    print('\nSymbol table:\n', symbolTable)

    print('\n\nCodification table:\n')

    for e in codification:
Example #8
0
class HackAssembler:
    def __init__(self, file_name):
        self._parser = Parser()  # Parser object
        self._code = Code()  # Code object
        self._symbol_table = SymbolTable()  # SymbolTable object
        asm_file = open(file_name, 'r')  # open the asm file
        self._instructions = asm_file.read().split(
            '\n')  # initialize a list with the assembly instructions
        asm_file.close()  # close the asm file
        self._hack_file_name = file_name.split(
            '.asm')[0] + '.hack'  # name of the output hack file
        self._machine_code = [
        ]  # initialize a list for the output hack instructions
        self._first_pass()
        self._second_pass()

    # iterate to search for label declarations and put this labels in the symbol table
    def _first_pass(self):
        pc = 0  # program counter
        for line in self._instructions:
            # increase the pc only if the pass find a real instruction
            if self._is_instruction(line):
                pc += 1
            elif self._is_label_declaration(line):
                # extract the label name from the instruction line
                label = self._remove_comment(line).replace('(', '').replace(
                    ')', '')
                # add the label to the symbol table
                self._symbol_table.add(label, pc)
        # now that the symbol table has all labels, we set the symbol table to the Code object
        self._code.set_symbol_table(self._symbol_table)

    # iterate and translate each assembly instruction to hack machine code
    def _second_pass(self):
        for line in self._instructions:
            # translate if the current line is a real instruction
            if self._is_instruction(line):
                instruction = self._remove_comment(line)
                if self._is_c_instruction(
                        instruction):  # translate a C-Instruction
                    comp, dest, jump = self._translate_c_instruction(
                        instruction)
                    out = '111' + comp + dest + jump
                else:  # translate a A-Instruction
                    value = self._translate_a_instruction(instruction)
                    out = '0' + value  # op_code + value
                self._machine_code.append(out + '\n')
        self._hack_file = open(self._hack_file_name,
                               'w')  # open the output hack file
        self._hack_file.writelines(
            self._machine_code
        )  # write the machine code instructions in the file
        self._hack_file.close()

    # receive an assembly A-Instruction and return the 15-bits address
    def _translate_a_instruction(self, instruction):
        self._parser.set_a_instruction(instruction)
        v = self._parser.value()
        return self._code.value(v)

    # receive an assembly C-Instruction and return a tuple with the 3 layers of machine language
    def _translate_c_instruction(self, instruction):
        self._parser.set_c_instruction(instruction)
        c = self._parser.comp()
        d = self._parser.dest()
        j = self._parser.jump()
        cc = self._code.comp(c)
        dd = self._code.dest(d)
        jj = self._code.jump(j)
        return cc, dd, jj

    def _is_label_declaration(self, line):
        instruction = line.replace(' ', '')
        return instruction != '' and instruction[0] == '('

    # return True if the current line is a assembly instruction
    # return False if the current line is a blank line or a comment line
    def _is_instruction(self, line):
        instruction = line.replace(' ', '')
        return instruction != '' and instruction[0:2] != '//' and instruction[
            0] != '('

    # remove the in-line comments of a assembly instruction
    def _remove_comment(self, line):
        instruction = line.replace(' ', '')
        return instruction.split('//')[0]

    # return True if the instruction argument is a C-Instruction
    # return False if the instruction argument is a A-Instruction
    def _is_c_instruction(self, instruction):
        return instruction[0] != '@'
Example #9
0
class Scanner:
    def __init__(self, problem):
        self.__symbolTable = SymbolTable(10)
        self.__programInternalForm = []
        self.__listOfTokens = []
        self.readFromFile("tokens.txt")
        self.__input = ""
        self.readProgram(problem)

    def readFromFile(self, fileName):
        f = open(fileName, "r")
        for token in f:
            self.__listOfTokens.append(token[:-1])
        f.close()

    def readProgram(self, fileName):
        f = open(fileName, "r")
        for instruction in f:
            self.__input += " " + instruction[:-1]
        f.close()

    def printInput(self):
        print(self.__input)

    def isValidIdentifier(self, token):
        if token[
                0] not in "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM":
            return False
        for char in token[1:]:
            if char != "_" and char not in "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM1234567890_":
                return False
        return True

    def isValidConstantInt(self, token):
        if token[0] == "0":
            return False
        for char in token:
            if char not in "1234567890":
                return False
        return True

    def isValidConstantString(self, token):
        if token[0] != "'":
            return False
        if token[-1] != "'":
            return False
        for char in token[1:-1]:
            if char not in "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM1234567890_":
                return False
        return True

    def clasifyToken(self, t):
        token = t.strip(" ")
        if token == '':
            return
        if (token in self.__listOfTokens):
            self.__programInternalForm.append([token, -1])
        elif self.isValidIdentifier(token):
            id = self.__symbolTable.findValue(token)
            if id == -1:
                self.__symbolTable.add(token)
                self.__programInternalForm.append(["id", id])
            else:
                self.__programInternalForm.append(["id", id])
        elif self.isValidConstantInt(token):
            self.__programInternalForm.append(["const", token])
        elif self.isValidConstantString(token):
            self.__programInternalForm.append(["const", token])
        else:
            raise Exception("Lexical error found! Invalid token '" + token +
                            "'")

    def isSeparator(self, char):
        return char in ["(", ")", "[", "]", " ", ";", '"', ":", "\n"]

    def isOperator(self, char):
        return char in [
            "+", "-", "*", "/", "%", "<=", ">=", "==", "=", "<", ">", "and",
            "or", "not"
        ]

    def splitInputSeparators(self):
        #split for all delimiters
        separatedInput = []
        lastSeparator = 0
        for i in range(0, len(self.__input)):
            if self.isSeparator(self.__input[i]):
                if lastSeparator == 0:
                    separatedInput.append(self.__input[lastSeparator:i])
                    separatedInput.append(self.__input[i])
                elif i == len(self.__input) + 1:
                    separatedInput.append(self.__input[lastSeparator])
                    separatedInput.append(self.__input[lastSeparator + 1:i +
                                                       1])
                else:
                    separatedInput.append(self.__input[lastSeparator + 1:i])
                    separatedInput.append(self.__input[i])
                lastSeparator = i
        return separatedInput
        '''
        separatedInput = re.split(";|:| |\(|\)|\[|\]",self.__input)
        return separatedInput'''

    def splitInputOperators(self):
        #split for all operators to get constants and identifiers
        separatedInput = self.splitInputSeparators()
        allTokens = []
        for word in separatedInput:
            lastOperator = 0
            for i in range(0, len(word)):
                if self.isOperator(word[i]):
                    if lastOperator == 0:
                        allTokens.append(word[lastOperator:i])
                        allTokens.append(word[i])
                    else:
                        allTokens.append(word[lastOperator + 1:i])
                        allTokens.append(word[i])
                    lastOperator = i
            if lastOperator == 0:
                allTokens.append(word)
            else:
                allTokens.append(word[lastOperator + 1:i + 1])
        return allTokens

    def scan(self):
        tokensInProgram = self.splitInputOperators()
        for token in tokensInProgram:
            self.clasifyToken(token)
        print("\nThe program has been scanned!\n")
        self.printOutput()

    def printTokens(self):
        print(self.__listOfTokens)

    def printOutput(self):
        print("Symbol Table: ")
        print(self.__symbolTable)
        print("PIF: ")
        print(self.__programInternalForm)
Example #10
0
File: main.py Project: polk15/FLCD
if __name__ == '__main__':
    file = open('p1', 'r')
    st = SymbolTable()
    pif = ProgramInternalForm()

    line_index = 0
    for line in file:
        line_index += 1

        if line[-1] == '\n':
            line = line[0:-1]  # Skip \n

        for token in Scanner.get_tokens_from_line(line):
            if token == ' ':
                continue
            if token in operators_separator_words:
                pif.add(codes[token], -1)
            elif Scanner.is_identifier(token):
                # Add and return id or just return id
                pif.add(codes['identifier'], st.add(token))
            elif Scanner.is_constant(token):
                # Add and return id or just return id
                pif.add(codes['constant'], st.add(token))
            else:
                print(f"Unknown token '{token}' at line {line_index}!")

    print("Symbol table: ")
    st.print()
    print(f"Program internal form: {pif}")
class CompilationEngine:
    all_operators = {
        "+": "add",
        "-": "sub",
        "/": "div",
        "*": "mul",
        "&amp;": "and",
        "|": "or",
        "&gt;": "gt",
        "&lt;": "lt",
        "=": "eq"
    }

    def __init__(self, tokens, out_file):
        """
        initializing a new compile engine object
        :param tokens: the list of tokens created by the tokenizer
        :param out_file: the output file.
        """
        self.__tokens = tokens
        self.__file = out_file
        self.__i = 0
        self.__class_symbol = SymbolTable()
        self.__subroutine_symbol = SymbolTable()
        self.__cur_token = ()
        self.__class_name = ""
        self.__writer = VMWriter(out_file)
        self.__label_count = 0
        self.compile_class()
        self.__writer.close()

    def eat(self):
        """
        compiling a single token and move to the next one
        """
        self.__cur_token = self.__tokens[self.__i]
        self.__i += 1

    def get_token(self):
        return self.__cur_token[1]

    def peek(self):
        """
        checking the current token without compiling
        :return: the token
        """
        ret_val = self.__tokens[self.__i]
        return ret_val[1]

    def peek_type(self):
        """
        checking the current token type without compiling
        :return: the token type
        """
        ret_val = self.__tokens[self.__i]
        return ret_val[0]

    def peek_ll2(self):
        """
        checking two tokens ahead without compiling
        :return: the token
        """
        ret_val = self.__tokens[self.__i + 1]
        return ret_val[1]

    def compile_while_stat(self):  # i points to while
        """
        compiling while statement
        """
        self.eat()
        self.eat()
        label_true = "L%s" % self.__label_count
        self.__label_count += 1
        label_continue = "L%s" % self.__label_count
        self.__label_count += 1
        self.__writer.write_label(label_true)
        self.compile_expression()
        self.__writer.write_arithmetic("not")
        self.__writer.write_if(label_continue)
        self.eat()
        self.eat()
        self.compile_statements()
        self.__writer.write_go_to(label_true)
        self.eat()
        self.__writer.write_label(label_continue)

    def compile_return_stat(self):  # i points to return
        """
        compiling return statement
        """
        self.eat()
        if not self.peek() == ";":
            self.compile_expression()
        else:
            self.__writer.write_push("constant", 0)
        self.__writer.write_return()
        self.eat()

    def compile_do_stat(self):
        """
        compiling do statement
        """
        self.eat()
        self.compile_subroutine_call()
        self.__writer.write_pop("temp", 0)
        self.eat()

    def compile_if_stat(self):
        """
        compiling if statement
        """
        self.eat()
        self.eat()
        self.compile_expression()
        self.__writer.write_arithmetic("not")
        label_false = "L%s" % self.__label_count
        self.__label_count += 1
        label_continue = "L%s" % self.__label_count
        self.__label_count += 1
        self.__writer.write_if(label_false)
        self.eat()
        self.eat()
        self.compile_statements()
        self.__writer.write_go_to(label_continue)
        self.eat()
        self.__writer.write_label(label_false)
        if self.peek() == "else":
            self.eat()
            self.eat()
            self.compile_statements()
            self.eat()
        self.__writer.write_label(label_continue)

    def compile_class_var_dec(self):
        """
        compiling class variable declaration
        """
        self.eat()
        kind = self.get_token()
        if kind == "var":
            kind = SymbolTable.VAR
        self.var_dec_helper(kind, self.__class_symbol)

    def compile_var_dec(self):
        """
        compiling variable declaration
        """
        self.eat()
        self.var_dec_helper(SymbolTable.VAR, self.__subroutine_symbol)

    def var_dec_helper(self, kind, symbol_table):

        self.eat()
        type = self.get_token()
        self.eat()
        name = self.get_token()
        symbol_table.add(name, type, kind)
        cur_stat = self.peek()
        while cur_stat != ";":
            self.eat()
            self.eat()
            name = self.get_token()
            symbol_table.add(name, type, kind)
            cur_stat = self.peek()
        self.eat()

    def compile_subroutine_body(self, func_name, func_type):
        """
        compiling subroutine body
        """
        self.eat()
        cur_stat = self.peek()
        while cur_stat == "var":
            self.compile_var_dec()
            cur_stat = self.peek()
        self.__writer.write_function(
            func_name, self.__subroutine_symbol.var_count(SymbolTable.VAR))
        self.__subroutine_symbol.add("this", self.__class_name, "pointer")
        if func_type == "method":
            self.__writer.write_push(SymbolTable.ARG, 0)
            self.__writer.write_pop("pointer", 0)

        elif func_type == "constructor":
            self.__writer.write_push(
                "constant", self.__class_symbol.var_count(SymbolTable.FIELD))
            self.__writer.write_call("Memory.alloc", 1)
            self.__writer.write_pop("pointer", 0)
        self.compile_statements()
        self.eat()

    def compile_parameter_list(self):
        """
        compiling parameters list
        """
        cur_stat = self.peek()
        if cur_stat != ")":
            self.eat()
            type = self.get_token()
            self.eat()
            name = self.get_token()
            self.__subroutine_symbol.add(name, type, SymbolTable.ARG)
            cur_stat = self.peek()

        while cur_stat == ",":
            self.eat()
            self.eat()
            type = self.get_token()
            self.eat()
            name = self.get_token()
            self.__subroutine_symbol.add(name, type, SymbolTable.ARG)
            cur_stat = self.peek()

    def compile_class(self):
        """
        compiling class
        """
        self.eat()
        self.eat()
        self.__class_name = self.get_token()
        self.eat()
        cur_stat = self.peek()

        while cur_stat == "static" or cur_stat == "field":
            self.compile_class_var_dec()
            cur_stat = self.peek()

        while cur_stat != "}":
            self.compile_subroutine_dec()
            cur_stat = self.peek()
        self.eat()

    def compile_expression(self):
        """
        compiling expression
        """
        self.compile_term()
        cur_stat = self.peek()
        while cur_stat in CompilationEngine.all_operators.keys():
            self.eat()
            self.compile_term()
            self.compile_operation(cur_stat)
            cur_stat = self.peek()

    def compile_operation(self, op):
        """
        compiling operation
        :param op: current op
        """
        if op == "*":
            self.__writer.write_call("Math.multiply", 2)

        elif op == "/":
            self.__writer.write_call("Math.divide", 2)

        else:
            self.__writer.write_arithmetic(CompilationEngine.all_operators[op])

    def compile_statements(self):
        """
        compiling statements
        """
        while self.compile_statement():
            continue

    def compile_subroutine_call(self):
        """
        compiling subroutine call
        """
        self.eat()
        name = self.get_token()
        cur_stat = self.peek()
        if cur_stat == "(":
            self.eat()
            self.__writer.write_push("pointer", 0)
            args = self.compile_expression_list()
            self.eat()
            self.__writer.write_call(self.__class_name + "." + name, args + 1)
        else:
            self.eat()
            val = self.find(name)
            self.eat()
            var_name = self.get_token()
            self.eat()
            if not val:
                args = 0
            else:
                self.__writer.push_val(val)
                name = val[0]
                args = 1

            args += self.compile_expression_list()
            self.__writer.write_call(name + "." + var_name, args)
            self.eat()

    def compile_expression_list(self):
        """
        compiling expression list
        """
        args = 0
        cur_stat = self.peek()
        if cur_stat != ")":
            self.compile_expression()
            args += 1
            cur_stat = self.peek()

        while cur_stat == ",":
            self.eat()
            args += 1
            self.compile_expression()
            cur_stat = self.peek()

        return args

    def compile_statement(self):
        """
        compiling statement
        """
        cur_stat = self.peek()
        if cur_stat == "if":
            self.compile_if_stat()
        elif cur_stat == "while":
            self.compile_while_stat()
        elif cur_stat == "do":
            self.compile_do_stat()
        elif cur_stat == "return":
            self.compile_return_stat()
        elif cur_stat == "let":
            self.compile_let_stat()
        else:
            return 0  # when there is no more statements to compile
        return 1

    def compile_let_stat(self):
        """
        compiling let statement
        """
        self.eat()
        self.eat()
        name = self.get_token()
        data = self.find(name)
        kind = data[1]
        ind = data[2]

        if kind == "field":
            kind = "this"

        cur_stat = self.peek()
        if cur_stat == "[":
            self.compile_array(kind, ind)
        else:
            self.eat()
            self.compile_expression()
            self.__writer.write_pop(kind, ind)
        self.eat()  # eat ;

    def compile_subroutine_dec(self):
        """
        compiling subroutine declaration
        """
        self.eat()
        func_type = self.get_token()
        self.eat()
        self.eat()
        func_name = self.__class_name + "." + self.get_token()
        self.eat()
        if func_type == "method":
            self.__subroutine_symbol.add("this", self.__class_name,
                                         SymbolTable.ARG)
        self.compile_parameter_list()
        self.eat()
        self.compile_subroutine_body(func_name, func_type)
        self.__subroutine_symbol = SymbolTable()

    def compile_term(self):
        """
        compiling term
        """
        cur_stat = self.peek_type()
        if cur_stat == JackTokenizer.INT_CONST:
            self.__writer.write_push("constant", self.peek())
            self.eat()
            return

        if cur_stat == JackTokenizer.KEYWORD:
            if self.peek() == "null" or self.peek() == "false":
                self.__writer.write_push("constant", 0)

            elif self.peek() == "true":
                self.__writer.write_push("constant", 0)
                self.__writer.write_arithmetic("not")

            elif self.peek() == "this":
                self.__writer.write_push("pointer", 0)

            self.eat()
            return

        if cur_stat == JackTokenizer.STR_CONST:
            string1 = self.peek().replace('\t', "\\t")
            string2 = string1.replace('\n', "\\n")
            string3 = string2.replace('\r', "\\r")
            string = string3.replace('\b', "\\b")
            self.__writer.write_push("constant", len(string))
            self.__writer.write_call("String.new", 1)
            for ch in string:
                self.__writer.write_push("constant", ord(ch))
                self.__writer.write_call("String.appendChar", 2)
            self.eat()
            return

        cur_stat = self.peek()
        if cur_stat == "(":
            self.eat()
            self.compile_expression()
            self.eat()
            return

        if cur_stat == "-":
            self.eat()
            self.compile_term()
            self.__writer.write_arithmetic("neg")
            return

        if cur_stat == "~":
            self.eat()
            self.compile_term()
            self.__writer.write_arithmetic("not")
            return

        cur_stat = self.peek_ll2()
        if cur_stat == "[":
            self.eat()
            name = self.get_token()
            self.__writer.push_val(self.find(name))
            self.eat()
            self.compile_expression()
            self.__writer.write_arithmetic("add")
            self.__writer.write_pop("pointer", 1)
            self.__writer.write_push("that", 0)
            self.eat()
            return

        if cur_stat == "." or cur_stat == "(":
            self.compile_subroutine_call()
            return

        self.eat()  # varName
        name = self.get_token()
        self.__writer.push_val(self.find(name))
        return

    def find(self, name):
        """
        finding a variable name in symbol tables
        """
        val = self.__subroutine_symbol.get_data(name)
        if not val:
            val = self.__class_symbol.get_data(name)
        elif not val:
            return False
        return val

    def compile_array(self, kind, index):
        """
        compiling array assignment
        :param kind: var kind
        :param index: var index
        """
        self.eat()
        self.compile_expression()
        self.eat()
        self.__writer.write_push(kind, index)
        self.__writer.write_arithmetic("add")
        self.eat()
        self.compile_expression()
        self.__writer.write_pop("temp", 0)
        self.__writer.write_pop("pointer", 1)
        self.__writer.write_push("temp", 0)
        self.__writer.write_pop("that", 0)
Example #12
0
from SymbolTable import SymbolTable

if __name__ == "__main__":
    _symbolTable = SymbolTable()

    _symbolTable.add("N1")
    _symbolTable.add("N2")
    _symbolTable.add("AB")
    _symbolTable.add("BC")
    _symbolTable.add("N3")
    _symbolTable.add("A3")
    '''
    result = _symbolTable.search("N1")
    if result is None:
        print("None")
    else:
        print(result)

    result = _symbolTable.search("N4")
    if result is None:
        print("None")
    else:
        print(result)

    result = _symbolTable.search("15")
    if result is None:
        print("None")
    else:
        print(result)
        '''