class Scanner: def __init__(self, problem_file, tokens, separators): self.__problem_file = problem_file self.__tokens = tokens self.__separators = separators self.__pif = HashTable() self.__st = SymbolTable() def scan(self): line_count = 0 for line in self.__problem_file.read().splitlines(): line_count += 1 for token in self.tokenize(line): if token in self.__tokens: self.__pif.add(token, (-1, 1)) elif isinstance(token, Identifier) or isinstance( token, Constant): token_id = self.__st.add(token) self.__pif.add(token, token_id) else: print('Lexical error for token:', token, ', at line:', str(line_count)) return def tokenize(self, line): return line.split(str(self.__separators)) def get_pif(self): return self.__pif def get_st(self): return self.__st
def main(): symbol_table = SymbolTable() v1 = Identifier('a', 5) v2 = Identifier('b', 7) v3 = Identifier('c', 12) v4 = Identifier('a', 14) c1 = Constant('d', 7) symbol_table.add(v1) symbol_table.add(v2) symbol_table.add(v3) symbol_table.add(c1) symbol_table.add(v4) v2.set_value(42) symbol_table.add(v2) c2 = Constant('c', 17) symbol_table.add(c2) symbol_table.print_symbol_table()
def syntatic(stack, input): # Algoritmo descendente sem backtracking table = SymbolTable() X = stack.top() a = input.top()[1] lex = input.top()[0] callfuncao = False declarado = False function = False while X is not "$": print("X = {}\ta = {}\t lexema = {}".format(X, a, lex)) # Se X é î (token de î = 17) if X == 17: ## # print('1') ## stack.pop() X = stack.top() # Incluindo alterações no arquivo file.write("--> X == 17 (Retirou elemento vazio da pilha)") print_stacks(stack, input) # Se X é terminal elif X <= 50: ## # print('2') ## if X == a: if a == 41 and declarado: declarado = False # callfuncao if a == 27: name = input.stack[-2][0] callfuncao = True if not table.search(name): print( "\nErro na linha {}: Função não declarada".format( int(input.top()[2]) + 1)) exit() # Se for uma chamada de funçao if a == 7 and callfuncao: function = True params = [] if input.stack[-2][0] == '(': i = -3 p = input.stack[i][0] while p != ')': if p != ',': try: p = table.table[p]['type'] except: if input.stack[i][1] == 5: p = 'integer' if input.stack[i][1] == 6: p = 'float' if input.stack[i][1] == 8: p = 'char' params.append(p) i -= 1 p = input.stack[i][0] if not params: params = None if params != table.table[name]['params']: print( "\nErro na linha {}: Parametros incorretos".format( int(input.top()[2]) + 1)) exit() callfuncao = False stack.pop() input.pop() X = stack.top() a = input.top()[1] lex = input.top()[0] continue # Atribuição if a == 7 and input.stack[-2][0] == '=': if not table.search(input.top()[0]): print("\nErro na linha {}: Variável não declarada.". format(int(input.top()[2]) + 1)) exit() type = input.stack[-3][1] if type == 5: if not (table.table[input.stack[-1][0]]['type'] == 'integer'): print("\nErro na linha {}: Atribuição incorreta". format(int(input.top()[2]) + 1)) exit() elif type == 6: if not (table.table[input.stack[-1][0]]['type'] == 'float'): print("\nErro na linha {}: Atribuição incorreta". format(int(input.top()[2]) + 1)) exit() elif type == 8: if not (table.table[input.stack[-1][0]]['type'] == 'char'): print("\nErro na linha {}: Atribuição incorreta". format(int(input.top()[2]) + 1)) exit() # Se for uma variavel if a == 7 and not declarado and not function: if not table.search(input.top()[0]): # Nome de função sem parametos if input.stack[-2][0] == '{': # Add (name, cat, type, level, params=None) table.add(input.top()[0], 'f', 0, 0) # Nome de função com parametros elif input.stack[-2][0] == '(': params = [] i = -3 p = input.stack[i][0] while p != ')': if p != ';': params.append(p) i -= 1 p = input.stack[i][0] # Add (name, cat, type, level, params=None) table.add(input.top()[0], 'f', 0, 0, params=params) # Declaração de uma variável elif input.stack[-2][0] == ':': type = input.stack[-3][0] table.add(input.top()[0], 'v', type, 0) # Declaração de mais de uma variável elif input.stack[-2][0] == ',': declarado = True i = -3 variaveis = [] variaveis.append(input.top()[0]) p = input.stack[i][0] while p != ':': if p == ',': i -= 1 else: variaveis.append(p) i -= 1 p = input.stack[i][0] type = input.stack[i - 1][0] for v in variaveis: # Add (name, cat, type, level, params=None) table.add(v, 'v', type, 0) else: print("\nErro na linha {}: Variável não declarada". format(int(input.top()[2]) + 1)) exit() else: # Não está declarando variável if input.stack[-2][0] != ',' or input.stack[-2][ 0] == ':': stack.pop() input.pop() X = stack.top() a = input.top()[1] lex = input.top()[0] continue # Erro print("\nErro na linha {}: Redeclaração de variáveis". format(int(input.top()[2]) + 1)) table.print() exit() stack.pop() input.pop() # Incluindo alterações no arquivo file.write( "--> X == a (Retirou os elementos da pilha e da entrada)") print_stacks(stack, input) if input.top() == '$' and stack.top() == '$': break X = stack.top() a = input.top()[1] lex = input.top()[0] continue else: ## # print('4') ## print("\nErro na linha {}".format(int(input.top()[2]) + 1)) exit() # Se X é não teminal elif X > 50: ## # print('5') ## try: ## # print('6') ## r = parser[X][a] s = rules[r] stack.pop() for i in reversed(list(s)): stack.push(i) X = stack.top() # Incluindo alterações no arquivo file.write("--> X > 50 (Incluiu regra {} na pilha)".format(r)) print_stacks(stack, input) continue except: ## # print('7') ## #print("\nStack: {}".format(stack)) #print("input: {}".format(input)) print("\nErro na linha {}".format(int(input.top()[2]) + 1)) exit()
class AnalisadorLexico: def __init__(self, fileName): self.lexemes = { '>': 'MAIOR', '<': 'MENOR', '>=': 'MAIOR_IGUAL', '<=': 'MENOR_IGUAL', '!=': 'DIFERENTE', '==': 'IGUAL', '=': 'ATRIBUICAO', '+': 'SOMA', '-': 'SUBTRACAO', '*': 'MULTIPLICACAO', '/': 'DIVISAO', ',': 'VIRGULA', '.': 'PONTO', ';': 'PONTO_VIRGULA', '(': 'ABRE_PARENTESE', ')': 'FECHA_PARENTESE', '{': 'ABRE_CHAVE', '}': 'FECHA_CHAVE', '[': 'ABRE_COLCHETE', ']': 'FECHA_COLCHETE' } self.symbolTable = SymbolTable() self.tokens = [] self.errors = [] self.indice = 0 self.reserved = [ "int", "char", "float", "struct", "if", "else", "while", "void", "return" ] self.operators = [ "=", "<=", "<", ">", ">=", "==", "!=", "+", "-", "*", "/" ] self.separators = [",", ".", "[", "{", "(", ")", "}", "]", ";"] self.skip = ['\t', ' ', '\n', '\r'] self.letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" self.digits = "0123456789" self.reader = BufferedFileReader(fileName) self.root = Noh(None, True, 'programa') self.arvore = ArvoreSintatica(self.root) def analisar(self): try: current_character = self.reader.getChar() self.reader.goForward() next_character = self.reader.getChar() lexeme = "" #Comentarios if current_character == '/' and next_character == '*': self.reader.goForward() current_character = self.reader.getChar() self.reader.goForward() next_character = self.reader.getChar() while not (next_character == '/' and current_character == '*'): current_character = self.reader.getChar() self.reader.goForward() next_character = self.reader.getChar() self.reader.goForward() #Ids e reservadas elif (current_character in self.letters): location = [self.reader.getLine(), self.reader.getColumn()] while (current_character in self.letters) or (current_character in self.digits): lexeme += current_character current_character = self.reader.getChar() self.reader.goForward() self.reader.goBackwards() if lexeme in self.reserved: self.tokens.append( Token("RESERVADO", lexeme, location, None)) else: self.tokens.append( Token("ID", None, location, self.symbolTable.add(lexeme))) #Operadores elif current_character in self.operators or current_character == '!': joint = '' joint += current_character current_character = self.reader.getChar() self.reader.goForward() if current_character in self.operators: joint += current_character if joint in self.operators: self.tokens.append( Token(self.lexemes[joint], joint, [ self.reader.getLine(), self.reader.getColumn() ], None)) else: #PanicMode self.tokens.append( Token(self.lexemes[joint[0]], joint[0], [ self.reader.getLine(), self.reader.getColumn() ], None)) self.tokens.append( Token(self.lexemes[joint[1]], joint[1], [ self.reader.getLine(), self.reader.getColumn() ], None)) self.errors.append( "Warning -> Dois operandos diferentes juntos que nao agem em conjunto. Linha: " + str(self.reader.getLine()) + " Coluna: " + str(self.reader.getColumn())) elif joint in self.operators: self.tokens.append( Token(self.lexemes[joint], joint, [self.reader.getLine(), self.reader.getColumn()], None)) self.reader.goBackwards() else: #PanicMode self.errors.append( "Warning -> exclamacao ignorada. Linha: " + str(self.reader.getLine()) + " Coluna: " + str(self.reader.getColumn() - 1)) #Constantes Numericas elif current_character in self.digits: floating = False exponencial = False notIncluded = True number = '' while current_character in self.digits or current_character == '.' or current_character == 'E': if current_character in self.digits: number += current_character elif current_character == '.' and ((not floating) and (not exponencial)): floating = True number += current_character elif current_character == '.' and (floating or exponencial): self.errors( "Error -> Constante numerica invalida. Linha: " + str(self.reader.getLine()) + " Coluna: " + str(self.reader.getColumn())) elif current_character == 'E' and (not exponencial): if number[len(number) - 1] == '.': #PanicMode number += '0' + current_character exponencial = True self.errors.append( "Warning -> Constante numerica convergida. Linha: " + str(self.reader.getLine()) + " Coluna: " + str(self.reader.getColumn())) else: number += current_character exponencial = True current_character = self.reader.getChar() self.reader.goForward() if current_character == '+' or current_character == '-': self.tokens.append( Token("CONSTANTE NUMERICA", None, [ self.reader.getLine(), self.reader.getColumn() ], self.symbolTable.add(number))) self.tokens.append( Token(self.lexemes[current_character], current_character, [ self.reader.getLine(), self.reader.getColumn() ], None)) notIncluded = False break elif current_character in self.digits: number += current_character current_character = self.reader.getChar() self.reader.goForward() if current_character in self.letters and current_character != 'E': #PanicMode self.reader.goBackwards() self.errors.append( "Warning -> Digitos ignorados no inicio de um identificador. Linha: " + str(self.reader.getLine()) + " Coluna: " + str(self.reader.getColumn())) elif notIncluded: self.reader.goBackwards() self.tokens.append( Token("CONSTANTE NUMERICA", None, [self.reader.getLine(), self.reader.getColumn()], self.symbolTable.add(number))) #Separadores elif current_character in self.separators: self.tokens.append( Token(self.lexemes[current_character], current_character, [self.reader.getLine(), self.reader.getColumn()], None)) elif current_character not in self.skip: self.errors.append( "Error -> Caractere nao suportado. Linha: " + str(self.reader.getLine()) + " Coluna: " + str(self.reader.getColumn())) self.analisar() except EndOfBufferException as e: self.errors.append(str(e)) except EndOfFileException as e: pass def imprimir_tokens(self): print("\nTokens (<tipo, valor/id, linha, coluna>):") for t in self.tokens: print(t) def peek_token(self): if (self.indice >= len(self.tokens)): return None return self.tokens[self.indice] def peek_second_token(self): if ((self.indice + 1) >= len(self.tokens)): return None return self.tokens[self.indice + 1] def pop_token(self): if (self.indice == len(self.tokens)): return None else: self.indice += 1 return self.tokens[self.indice - 1] def analise_sintatica(self): self.programa() def programa(self): if (self.peek_token() is not None): noh1 = Noh(self.arvore.root, False, 'declaracao_lista') self.arvore.root.addFilho(noh1) self.declaracao_lista(noh1) def declaracao_lista(self, noh): if (self.peek_token() is not None): noh1 = Noh(noh, False, 'declaracao') noh.addFilho(noh1) self.declaracao(noh1) noh2 = Noh(noh, False, 'declaracao_lista\'') noh.addFilho(noh2) self.declaracao_lista1(noh2) def declaracao_lista1(self, noh): if (self.peek_token() is not None): primeiros_declaracao = ['int', 'float', 'char', 'void', 'struct'] if (self.peek_token().lexeme in primeiros_declaracao): noh1 = Noh(noh, False, 'declaracao') noh.addFilho(noh1) self.declaracao(noh1) noh2 = Noh(noh, False, 'declaracao_lista\'') noh.addFilho(noh2) self.declaracao_lista1(noh2) def declaracao(self, noh): if (self.peek_token() is not None): primeiros_tipo_especificador = ['int', 'float', 'char', 'void'] if (self.peek_token().lexeme in primeiros_tipo_especificador): noh1 = Noh(noh, False, 'tipo_especificador') noh.addFilho(noh1) self.tipo_especificador(noh1) self.match(noh, "ID") noh2 = Noh(noh, False, 'declaracao\'') noh.addFilho(noh2) self.declaracao1(noh2) else: noh1 = Noh(noh, False, 'estrutura') noh.addFilho(noh1) self.estrutura(noh1) def declaracao1(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexeme == '('): noh1 = Noh(noh, False, 'fun_declaracao') noh.addFilho(noh1) self.fun_declaracao(noh1) else: noh1 = Noh(noh, False, 'var_declaracao') noh.addFilho(noh1) self.var_declaracao(noh1) def var_declaracao(self, noh): if (self.peek_token() is not None): noh1 = Noh(noh, False, 'var_declaracao\'') noh.addFilho(noh1) self.var_declaracao1(noh1) def var_declaracao1(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType == self.lexemes['[']): self.match(noh, '[') self.match(noh, 'CONSTANTE NUMERICA') self.match(noh, ']') noh1 = Noh(noh, False, 'var_declaracao\'') noh.addFilho(noh1) self.var_declaracao1(noh1) elif (self.peek_token().lexemeType == self.lexemes['=']): self.match(noh, '=') noh1 = Noh(noh, False, 'expressao') noh.addFilho(noh1) self.expressao(noh1) noh2 = Noh(noh, False, 'var_declaracao\'') noh.addFilho(noh2) self.var_declaracao1(noh2) else: self.match(noh, ';') def tipo_especificador(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexeme == "int"): self.match(noh, "int") elif (self.peek_token().lexeme == "float"): self.match(noh, "float") elif (self.peek_token().lexeme == "char"): self.match(noh, "char") else: self.match(noh, "void") def estrutura(self, noh): if (self.peek_token() is not None): self.match(noh, 'struct') self.match(noh, 'ID') self.match(noh, '{') noh1 = Noh(noh, False, 'atributos_declaracao') noh.addFilho(noh1) self.atributos_declaracao(noh1) self.match(noh, '}') def atributos_declaracao(self, noh): if (self.peek_token() is not None): primeiros_tipo_especificador = ['int', 'float', 'char', 'void'] if (self.peek_token().lexeme in primeiros_tipo_especificador): noh1 = Noh(noh, False, 'tipo_especificador') noh.addFilho(noh1) self.tipo_especificador(noh1) self.match(noh, 'ID') noh2 = Noh(noh, False, 'var_declaracao') noh.addFilho(noh2) self.var_declaracao(noh2) noh3 = Noh(noh, False, 'atributos_declaracao\'') noh.addFilho(noh3) self.atributos_declaracao1(noh3) else: noh1 = Noh(noh, False, 'estrutura') noh.addFilho(noh1) self.estrutura(noh1) noh2 = Noh(noh, False, 'declaracao\'') noh.addFilho(noh2) self.atributos_declaracao1(noh2) def atributos_declaracao1(self, noh): if (self.peek_token() is not None): primeiros_tipo_especificador = ['int', 'float', 'char', 'void'] if (self.peek_token().lexeme in primeiros_tipo_especificador): noh1 = Noh(noh, False, 'tipo_especificador') noh.addFilho(noh1) self.tipo_especificador(noh1) self.match(noh, 'ID') noh2 = Noh(noh, False, 'var_declaracao') noh.addFilho(noh2) self.var_declaracao(noh2) noh3 = Noh(noh, False, 'atributos_declaracao\'') noh.addFilho(noh3) self.atributos_declaracao1(noh3) elif (self.peek_token().lexeme == 'struct'): noh1 = Noh(noh, False, 'estrutura') noh.addFilho(noh1) self.estrutura(noh1) noh2 = Noh(noh, False, 'atributos_declaracao\'') noh.addFilho(noh2) self.atributos_declaracao1(noh2) def fun_declaracao(self, noh): if (self.peek_token() is not None): self.match(noh, '(') noh1 = Noh(noh, False, 'params') noh.addFilho(noh1) self.params(noh1) self.match(noh, ')') noh2 = Noh(noh, False, 'composto_decl') noh.addFilho(noh2) self.composto_decl(noh2) def params(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexeme == 'void'): self.match(noh, 'void') else: noh1 = Noh(noh, False, 'param_lista') noh.addFilho(noh1) self.param_lista(noh1) def param_lista(self, noh): if (self.peek_token() is not None): primeiros_tipo_especificador = ['int', 'float', 'char', 'void'] if (self.peek_token().lexeme in primeiros_tipo_especificador): noh1 = Noh(noh, False, 'param') noh.addFilho(noh1) self.param(noh1) noh2 = Noh(noh, False, 'param_lista') noh.addFilho(noh2) self.param_lista(noh2) elif (self.peek_token().lexeme == ','): self.match(noh, ',') noh1 = Noh(noh, False, 'param') noh.addFilho(noh1) self.param(noh1) noh2 = Noh(noh, False, 'param_lista') noh.addFilho(noh2) self.param_lista(noh2) def param(self, noh): if (self.peek_token() is not None): noh2 = Noh(noh, False, 'tipo_especificador') noh.addFilho(noh2) self.tipo_especificador(noh2) self.match(noh, "ID") noh1 = Noh(noh, False, 'param\'') noh.addFilho(noh1) self.param1(noh1) def param1(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType == self.lexemes['[']): self.match(noh, '[') self.match(noh, ']') noh1 = Noh(noh, False, 'param\'') noh.addFilho(noh1) self.param1(noh1) def composto_decl(self, noh): if (self.peek_token() is not None): self.match(noh, "{") noh1 = Noh(noh, False, 'instrucoes') noh.addFilho(noh1) self.instrucoes(noh1) self.match(noh, "}") def instrucoes(self, noh): if (self.peek_token() is not None): primeiros_comando = [ '{', 'if', 'while', 'return', '(', 'ID', 'CONSTANTE NUMERICA' ] primeiros_local_declaracoes = ["int", "char", "float", "void"] if (self.peek_token().lexeme in primeiros_comando or self.peek_token().lexemeType in primeiros_comando): noh2 = Noh(noh, False, 'comando_lista') noh.addFilho(noh2) self.comando_lista(noh2) noh1 = Noh(noh, False, 'instrucoes') noh.addFilho(noh1) self.instrucoes(noh1) elif (self.peek_token().lexeme in primeiros_local_declaracoes): noh2 = Noh(noh, False, 'local_declaracoes') noh.addFilho(noh2) self.local_declaracoes(noh2) noh1 = Noh(noh, False, 'instrucoes') noh.addFilho(noh1) self.instrucoes(noh1) def local_declaracoes(self, noh): if (self.peek_token() is not None): primeiros_tipo_especificador = ["int", "char", "float", "void"] if (self.peek_token().lexeme in primeiros_tipo_especificador): noh1 = Noh(noh, False, 'tipo_especificador') noh.addFilho(noh1) self.tipo_especificador(noh1) self.match(noh, 'ID') noh3 = Noh(noh, False, 'var_declaracao') noh.addFilho(noh3) self.var_declaracao(noh3) noh2 = Noh(noh, False, 'local_declaracoes') noh.addFilho(noh2) self.local_declaracoes(noh2) elif (self.peek_token().lexeme == 'struct'): noh1 = Noh(noh, False, 'estrutura') noh.addFilho(noh1) self.estrutura(noh1) noh2 = Noh(noh, False, 'local_declaracoes') noh.addFilho(noh2) self.local_declaracoes(noh2) def comando_lista(self, noh): if (self.peek_token() is not None): primeiros_comando = [ '{', 'if', 'while', 'return', '(', 'ID', 'CONSTANTE NUMERICA' ] if (self.peek_token().lexeme in primeiros_comando or self.peek_token().lexemeType in primeiros_comando): noh1 = Noh(noh, False, 'comando') noh.addFilho(noh1) self.comando(noh1) noh2 = Noh(noh, False, 'comando_lista') noh.addFilho(noh2) self.comando_lista(noh2) def comando(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType == self.lexemes['{']): noh1 = Noh(noh, False, 'composto_decl') noh.addFilho(noh1) self.composto_decl(noh1) elif (self.peek_token().lexeme == "if"): noh1 = Noh(noh, False, 'selecao_decl') noh.addFilho(noh1) self.selecao_decl(noh1) elif (self.peek_token().lexeme == "while"): noh1 = Noh(noh, False, 'iteracao_decl') noh.addFilho(noh1) self.iteracao_decl(noh1) elif (self.peek_token().lexeme == "return"): noh1 = Noh(noh, False, 'retorno_decl') noh.addFilho(noh1) self.retorno_decl(noh1) else: noh1 = Noh(noh, False, 'expressao_decl') noh.addFilho(noh1) self.expressao_decl(noh1) def expressao_decl(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexeme == ';'): self.match(noh, ';') else: noh1 = Noh(noh, False, 'expressao') noh.addFilho(noh1) self.expressao(noh1) self.match(noh, ";") def selecao_decl(self, noh): if (self.peek_token() is not None): self.match(noh, "if") self.match(noh, "(") noh1 = Noh(noh, False, 'expressao') noh.addFilho(noh1) self.expressao(noh1) self.match(noh, ")") noh2 = Noh(noh, False, 'comando') noh.addFilho(noh2) self.comando(noh2) noh3 = Noh(noh, False, 'else_decl') noh.addFilho(noh3) self.else_decl(noh3) def else_decl(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexeme == "else"): self.match(noh, "else") noh2 = Noh(noh, False, 'comando') noh.addFilho(noh2) self.comando(noh2) def iteracao_decl(self, noh): if (self.peek_token() is not None): self.match(noh, "while") self.match(noh, "(") noh1 = Noh(noh, False, 'expressao') noh.addFilho(noh1) self.expressao(noh1) self.match(noh, ")") noh2 = Noh(noh, False, 'comando') noh.addFilho(noh2) self.comando(noh2) def retorno_decl(self, noh): if (self.peek_token() is not None): self.match(noh, "return") noh2 = Noh(noh, False, 'retorno_decl\'') noh.addFilho(noh2) self.retorno_decl1(noh2) self.match(noh, ";") def retorno_decl1(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType != self.lexemes[';']): noh2 = Noh(noh, False, 'expressao') noh.addFilho(noh2) self.expressao(noh2) def expressao(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType == self.lexemes['('] or self.peek_token().lexemeType == 'CONSTANTE NUMERICA'): noh2 = Noh(noh, False, 'expressao_simples') noh.addFilho(noh2) self.expressao_simples(noh2) else: if (self.peek_second_token().lexemeType == self.lexemes['(']): noh2 = Noh(noh, False, 'ativacao') noh.addFilho(noh2) self.ativacao(noh2) noh1 = Noh(noh, False, 'termo\'') noh.addFilho(noh1) self.termo1(noh1) noh3 = Noh(noh, False, 'expressao_soma\'') noh.addFilho(noh3) self.expressao_soma1(noh3) noh4 = Noh(noh, False, 'expressao_simples\'') noh.addFilho(noh4) self.expressao_simples1(noh4) else: noh1 = Noh(noh, False, 'var') noh.addFilho(noh1) self.var(noh1) if (self.peek_token().lexemeType == self.lexemes['=']): self.match(noh, '=') noh2 = Noh(noh, False, 'expressao') noh.addFilho(noh2) self.expressao(noh2) else: noh2 = Noh(noh, False, 'termo\'') noh.addFilho(noh2) self.termo1(noh2) noh3 = Noh(noh, False, 'expressao_soma\'') noh.addFilho(noh3) self.expressao_soma1(noh3) noh4 = Noh(noh, False, 'expressao_simples\'') noh.addFilho(noh4) self.expressao_simples1(noh4) def var(self, noh): if (self.peek_token() is not None): self.match(noh, 'ID') noh1 = Noh(noh, False, 'var\'') noh.addFilho(noh1) self.var1(noh1) def var1(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType == self.lexemes['[']): self.match(noh, '[') noh2 = Noh(noh, False, 'expressao') noh.addFilho(noh2) self.expressao(noh2) self.match(noh, ']') noh1 = Noh(noh, False, 'var\'') noh.addFilho(noh1) self.var1(noh1) def expressao_simples(self, noh): if (self.peek_token() is not None): noh1 = Noh(noh, False, 'expressao_soma') noh.addFilho(noh1) self.expressao_soma(noh1) noh2 = Noh(noh, False, 'expressao_simples\'') noh.addFilho(noh2) self.expressao_simples1(noh2) def expressao_simples1(self, noh): if (self.peek_token() is not None): primeiros_relacional = ['<', '>', '==', '!=', '<=', '>='] if (self.peek_token().lexeme in primeiros_relacional): noh1 = Noh(noh, False, 'relacional') noh.addFilho(noh1) self.relacional(noh1) noh2 = Noh(noh, False, 'expressao_soma') noh.addFilho(noh2) self.expressao_soma(noh2) def expressao_soma(self, noh): if (self.peek_token() is not None): noh1 = Noh(noh, False, 'termo') noh.addFilho(noh1) self.termo(noh1) noh2 = Noh(noh, False, 'expressao_soma\'') noh.addFilho(noh2) self.expressao_soma1(noh2) def expressao_soma1(self, noh): if (self.peek_token() is not None): primeiros_soma = ['+', '-'] if (self.peek_token().lexeme in primeiros_soma): noh1 = Noh(noh, False, 'soma') noh.addFilho(noh1) self.soma(noh1) noh2 = Noh(noh, False, 'termo') noh.addFilho(noh2) self.termo(noh2) noh3 = Noh(noh, False, 'expressao_soma\'') noh.addFilho(noh3) self.expressao_soma1(noh3) def termo(self, noh): if (self.peek_token() is not None): noh1 = Noh(noh, False, 'fator') noh.addFilho(noh1) self.fator(noh1) noh2 = Noh(noh, False, 'termo\'') noh.addFilho(noh2) self.termo1(noh2) def termo1(self, noh): if (self.peek_token() is not None): primeiros_mult = ['*', '/'] if (self.peek_token().lexeme in primeiros_mult): noh1 = Noh(noh, False, 'mult') noh.addFilho(noh1) self.mult(noh1) noh2 = Noh(noh, False, 'fator') noh.addFilho(noh2) self.fator(noh2) noh3 = Noh(noh, False, 'termo\'') noh.addFilho(noh3) self.termo1(noh3) def fator(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType == self.lexemes['(']): self.match(noh, '(') noh1 = Noh(noh, False, 'expressao') noh.addFilho(noh1) self.expressao(noh1) self.match(noh, ')') elif (self.peek_token().lexemeType == 'CONSTANTE NUMERICA'): self.match(noh, 'CONSTANTE NUMERICA') elif (self.peek_second_token().lexemeType == self.lexemes['(']): noh1 = Noh(noh, False, 'ativacao') noh.addFilho(noh1) self.ativacao(noh1) else: noh1 = Noh(noh, False, 'var') noh.addFilho(noh1) self.var(noh1) def ativacao(self, noh): if (self.peek_token() is not None): self.match(noh, 'ID') self.match(noh, '(') noh1 = Noh(noh, False, 'args') noh.addFilho(noh1) self.args(noh1) self.match(noh, ')') def args(self, noh): if (self.peek_token() is not None): noh1 = Noh(noh, False, 'arg_lista') noh.addFilho(noh1) self.arg_lista(noh1) def arg_lista(self, noh): if (self.peek_token() is not None): primeiros_expressao = [ 'ID', 'CONSTANTE NUMERICA', self.lexemes['('] ] if (self.peek_token().lexemeType in primeiros_expressao): noh1 = Noh(noh, False, 'expressao') noh.addFilho(noh1) self.expressao(noh1) noh2 = Noh(noh, False, 'arg_lista') noh.addFilho(noh2) self.arg_lista(noh2) elif (self.peek_token().lexeme == ','): self.match(noh, ',') noh1 = Noh(noh, False, 'expressao') noh.addFilho(noh1) self.expressao(noh1) noh2 = Noh(noh, False, 'arg_lista') noh.addFilho(noh2) self.arg_lista(noh2) def relacional(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType == self.lexemes['>']): self.match(noh, '>') elif (self.peek_token().lexemeType == self.lexemes['<']): self.match(noh, '<') elif (self.peek_token().lexemeType == self.lexemes['>=']): self.match(noh, '>=') elif (self.peek_token().lexemeType == self.lexemes['<=']): self.match(noh, '<=') elif (self.peek_token().lexemeType == self.lexemes['!=']): self.match(noh, '!=') else: self.match(noh, '==') def soma(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType == self.lexemes['+']): self.match(noh, '+') else: self.match(noh, '-') def mult(self, noh): if (self.peek_token() is not None): if (self.peek_token().lexemeType == self.lexemes['*']): self.match(noh, '*') else: self.match(noh, '/') def match(self, noh, lexeme): token = self.peek_token() if (token is not None): if (lexeme == 'ID' or lexeme == 'CONSTANTE NUMERICA'): if (token.lexemeType != lexeme): self.errors.append('Syntax Error -> Esperado \'' + lexeme + '\' - encontrado \'' + str(token) + '\'') else: noh1 = Noh(noh, False, token) noh.addFilho(noh1) else: if (token.lexeme != lexeme): #panic mode if (lexeme == ';'): self.errors.append('Syntax Error -> Esperado \'' + lexeme + '\' - encontrado \'' + str(token) + '\'\nERRO FOI IGNORADO') noh1 = Noh(noh, False, ';') noh.addFilho(noh1) return elif (lexeme == 'struct'): self.errors.append('Syntax Error -> Esperado \'' + lexeme + '\' - encontrado \'' + str(token) + '\'\n') return self.errors.append('Syntax Error -> Esperado \'' + lexeme + '\' - encontrado \'' + str(token) + '\'') else: noh1 = Noh(noh, False, token) noh.addFilho(noh1) else: self.errors.append('Syntax Error -> Esperado \'' + lexeme + '\' - encontrado \'nada\'') return self.pop_token() #------------------------------------------------------------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------------------------------------------------------------ def imprimir_tabela_simbolos(self): print("\nTabela de Simbolos:") print(self.symbolTable) def imprimir_erros(self): if not self.errors: return print("Errors:") for e in self.errors: print(e) def imprimirArvore(self): print("Arvore Sintatica:\n") print(self.arvore)
class Interpreter: def __init__(self, code_string=None): self._KEYWORDS = ['read', 'write'] self._token = None self._line = 0 self._tokenizer = Tokenizer(code_string, ['+','-','/','*','(',')',':='], ['\n',' ']) self._symboltable = SymbolTable() def reset(self): self._line = 0 self._token = None self._tokenizer.clear() def interpret(self, code_string=None): if code_string is not None: self._tokenizer.append(code_string) self._consume() self.program() def _consume(self, _nomable=None): if _nomable == '$$': self.reset() return True if _nomable == 'id': self._symboltable.add(self._token, self._line) # TODO: add current token to AST self._token = self._tokenizer.next() def _is_token_id(self, _id=None): if self._token is None: raise ParseError(self._line, 'unexpected EOF') if _id is None: _id = self._token if self._symboltable.has(_id): return True elif _id.isalpha() and _id not in self._KEYWORDS: return True else: return False def _is_token_num(self, _num=None): if self._token is None: raise ParseError(self._line, 'unexpected EOF') if _num is None: _num = self._token if _num.isdigit(): return True else: return False def _is_token_id_or_num(self, _token=None): if _token is None: _token = self._token if self._is_token_id(_token) or self._is_token_num(_token): return True else: return False def _match(self, expected): # TODO: might conflict with id's named 'id' or 'number' if expected == self._token or expected in ['id', 'number']: self._consume(self._token) else: raise TokenError(self._line, self._token, expected) def _skip(self): pass def program(self): if self._token in ['read', 'write', '$$'] or self._is_token_id(): self._stmt_list() self._match('$$') else: raise ParseError(self._line, 'program') def _stmt_list(self): if self._token == '$$': self._skip() elif self._token in ['read', 'write'] or self._is_token_id(): self._line += 1 self._stmt() self._stmt_list() else: raise ParseError(self._line, 'stmt_list') def _stmt(self): if self._token == 'read': self._match('read') self._match('id') elif self._token == 'write': self._match('write') self._expr() elif self._is_token_id(): self._match('id') self._match(':=') self._expr() else: raise ParseError(self._line, 'stmt') def _expr(self): if self._token == '(' or self._is_token_id_or_num(): self._term() self._term_tail() else: raise ParseError(self._line, 'expr') def _term_tail(self): if self._token in ['+', '-']: self._add_op() self._term() self._term_tail() elif self._token in [')', 'read', 'write', '$$'] or self._is_token_id(): self._skip() else: raise ParseError(self._line, 'term_tail') def _term(self): if self._token == '(' or self._is_token_id_or_num(): self._factor() self._factor_tail() else: raise ParseError(self._line, 'term') def _factor_tail(self): if self._token in ['*', '/']: self._mult_op() self._factor() self._factor_tail() elif self._token in ['+', '-', ')', 'read', 'write', '$$'] or self._is_token_id(): self._skip() else: raise ParseError(self._line, 'factor_tail') def _factor(self): if self._token == '(': self._match('(') self._expr() self._match(')') elif self._is_token_id(): self._match('id') elif self._is_token_num(): self._match('number') else: raise ParseError(self._line, 'factor') def _add_op(self): if self._token == '+': self._match('+') elif self._token == '-': self._match('-') else: raise ParseError(self._line, 'add_op') def _mult_op(self): if self._token == '*': self._match('*') elif self._token == '/': self._match('/') else: raise ParseError(self._line, 'mult_op')
def run_ui(self): flag = True while flag: self.print_menu() cmd = int(input("Your comand : > ")) if cmd == 0: flag = False if cmd == 1: inputSequnceStr = input( "Give your input sequence (without space )") input_sequence_list = [] for char in inputSequnceStr: input_sequence_list.append(char) g = Grammar.fromFile("grammar.txt") lr0 = LZero(g) print(lr0.parse(input_sequence_list)) print(lr0.get_derivation_string(input_sequence_list)) if cmd == 2: scanner = Scanner() identifierTable = SymbolTable() constantsTable = SymbolTable() pif = ProgramInternalForm() fileName = 'program.txt' with open(fileName, 'r') as file: lineNo = 0 for line in file: lineNo += 1 for token in scanner.tokenGenerator( line.strip('\n'), separators): if token == ' ': continue if token in separators + operators + reservedWords: pif.add(codification[token], -1) elif scanner.isIdentifier(token): id = identifierTable.add(token) pif.add(codification['identifier'], id) elif scanner.isConstant(token): id = constantsTable.add(token) pif.add(codification['constant'], id) else: raise Exception('Unknown token ' + token + ' at line ' + str(lineNo)) print('Program Internal Form: \n', pif) print('Identifier Table: \n', identifierTable) print('Constants Table: \n', constantsTable) #turning the PIF into an input stack inverseCodification = dict([[codification[key], key] for key in codification]) for code in pif.getCodes(): print(code, ' : ', inverseCodification[code]) inputStack = [str(code) for code in pif.getCodes()] print(inputStack) g = Grammar.fromFile("my_grammar.txt") g.P = [('S1', ['.', g.S])] + g.P g.N += ['S1'] lr0 = LZero(g) print(g) print(lr0.parse(inputStack)) #SEQUENCE OF DERIVATIONS IN CURSU 6 print(lr0.get_derivation_string_program(inputStack))
class ProgramBuilder(): """ manages the in memory IR for LLVM """ def __init__(self, input_fName, output_fName): self.symbolTable = SymbolTable() self.parser = Parser(input_fName) self.output_fName = output_fName self._has_errors = False # llvm setup binding.initialize() binding.initialize_native_target() binding.initialize_native_asmprinter() def has_errors(self): """ checks the flags for scanner, parser or codegen errors""" return self._has_errors or self.parser._has_errors or self.parser.scanner._has_errors def initialize_module(self, name): # create module self.module = ir.Module(name=name) self.module.triple = binding.get_default_triple() # load builtin I/O functions self.load_builtins() # create main function func_type = ir.FunctionType(ir.VoidType(), [], False) base_func = ir.Function(self.module, func_type, name="main") block = base_func.append_basic_block(name="entry") # create llvm ir builder and set at start of the main block self.builder = ir.IRBuilder(block) def load_builtins(self): """ create builtin get and put functions (empty for now)""" # name returnType Param builtins = [ ("putbool", ir.VoidType(), [ir.IntType(1)]), ("putstring", ir.VoidType(), [ir.PointerType(ir.IntType(8))]), ("putinteger", ir.VoidType(), [ir.IntType(32)]), ("putfloat", ir.VoidType(), [ir.FloatType()]), ("getbool", ir.IntType(1), []), ("getinteger", ir.IntType(32), []), ("getfloat", ir.FloatType(), []), ("main.StringEquals", ir.IntType(1), [ir.PointerType(ir.IntType(8)), ir.PointerType(ir.IntType(8))]), ("getstring", ir.PointerType(ir.IntType(8)), []) ] for entry in builtins: fType = ir.FunctionType(entry[1], entry[2]) func = ir.Function(self.module, fType, name=entry[0]) symbol = Symbol(entry[0], func, fType, id_type='function') self.symbolTable.add(symbol) def output_ir(self): # signal return from main function # self.builder.ret_void() output = str(self.module) with open(self.output_fName, "w") as fd: fd.write(output) def generate_module_code(self): """ runs the parse loop and calls codegen functions for the input program""" # parse top level module info moduleName = self.parser.parse_program_header() if not moduleName: print("could not parse module header") return None self.initialize_module(moduleName) print("generating module: {}".format(moduleName)) # parse and generate top level declarations (variable or function) res = self.parser.parse_top_level_declaration() while res != tkn.BEGIN: if res != None: # and (not self.has_errors()): ir = res.codegen(self.builder, self.symbolTable, self.module) if ir == None: self._has_errors = True print("\n".join(res.getErrors())) res = self.parser.parse_top_level_declaration() # parse and generate top level statements res = self.parser.parse_top_level_statement() while res != tkn.EOF: if res != None: #and (not self.has_errors()): ir = res.codegen(self.builder, self.symbolTable, self.module) if ir == None: self._has_errors = True print("\n".join(res.getErrors())) res = self.parser.parse_top_level_statement() if not self.has_errors(): print("writing IR to LLVM assembly\n") self.output_ir() else: print("errors detected, compilation aborted")
class HackAssembler: def __init__(self, asm_filename): self.asm_filename = asm_filename self.symbol_table = SymbolTable() self.parser = Parser(self.symbol_table) self.binary_translator = BinaryTraslator(self.parser) self.next_open_memory_address = 16 self.labels_parsed = 0 self.output_string = '' def compile(self): # self.__remove_commented_and_empty_lines() self.__scan_for_labels() self.__scan_for_variables() self.__translate_to_binary() self.__write_to_out_file() # def __remove_commented_and_empty_lines(self): # self.__read_file_by_line(self.__remove_commented_or_empty_line) # def __remove_commented_or_empty_line(self, line, cnt): # line = line.split('//')[0].strip().replace(' ', '') # if len(line) > 0: # self.output_string += "{}\n".format(line) def __scan_for_labels(self): self.__read_file_by_line(self.__add_value_to_symbol_table_if_label) def __scan_for_variables(self): self.__read_file_by_line(self.__add_value_to_symbol_table_if_variable) def __translate_to_binary(self): self.__read_file_by_line(self.__translate_line_to_binary) def __write_to_out_file(self): base_filename = self.asm_filename.split('.asm')[0] hack_filename = "{}_Brayden.hack".format(base_filename) with open(hack_filename, "w") as text_file: text_file.write(self.output_string) def __read_file_by_line(self, block): lines_processed = 0 with open(self.asm_filename) as fp: for line in fp: line = self.__preprocess_line(line) if len(line) > 0: block(line, lines_processed) lines_processed += 1 def __preprocess_line(self, line): return line.split('//')[0].strip().replace(' ', '') def __add_value_to_symbol_table_if_label(self, line, cnt): if self.parser.is_label(line): self.symbol_table.add(self.parser.label_value(line), cnt - self.labels_parsed) self.labels_parsed += 1 def __add_value_to_symbol_table_if_variable(self, line, cnt): if self.parser.is_variable(line): if not self.symbol_table.contains( self.parser.variable_value(line)): self.symbol_table.add(self.parser.variable_value(line), self.next_open_memory_address) self.__increment_next_open_memory_address() def __translate_line_to_binary(self, line, _): binary_command = self.binary_translator.translate(line) if binary_command: self.output_string += '{}\n'.format(binary_command) def __increment_next_open_memory_address(self): self.next_open_memory_address += 1