def __init__(self, input_file): if not os.path.isabs(input_file): input_file = os.path.join(script_dir, input_file) self.scanner = Scanner(input_file) self.semantic_analyzer = SemanticAnalyser() self.code_generator = CodeGen() self._syntax_errors = [] self.root = Node("Program") # Start symbol self.parse_tree = self.root self.stack = [Node("$"), self.root] self.parse_tree_file = os.path.join(script_dir, "output", "parse_tree.txt") self.syntax_error_file = os.path.join(script_dir, "errors", "syntax_errors.txt")
def main(): number_of_tests = 27 test_passes = True status = "" logger = logging.getLogger() logger.setLevel(logging.INFO) for i in range(1, number_of_tests + 1, 1): prefix = f"tests/code_gen/samples/T{i}/" sc = build_scanner(f"{prefix}input.txt") parser = LL1(sc, grammar, CodeGen()) tables.get_token_table().tokens = [] tables.get_symbol_table().clear() tables.symbol_table.add_symbol(Token(TokenType.ID, "output")) tables.symbol_table.fetch("output").address = 5 tables.get_error_table().parse_trees = [] parser.generate_parse_tree() parser.code_gen.execute_from("main") parser.export_code("output.txt") os.system(test_command[platform.system()]) logger.warning(f"test no.{i}:") logger.warning( f"\texpected.txt:\t{open('expected.txt').read().strip() == open(f'{prefix}expected.txt').read().strip()}") test_status = open('expected.txt').read().strip() == open(f'{prefix}expected.txt').read().strip() test_passes = test_passes and test_status status += ("F", ".")[test_status] logger.warning("".ljust(60, "=")) logger.warning(status) logger.warning(("test failed", "test was successful!")[test_passes])
def compile_module_stream(name, stream): """ Compile one module from source stream """ out_debug('Compiling: %s', name) with stream: # Parse module source tok = Tokenizer(compatibility.file_readlines(stream)) parser = Parser(tok) ast = parser.parse_to_ast() # Generate intermediate code generator = CodeGen(ast, name) generator.load_module_defs('$builtin') compiled = generator.generate() return compiled
def main(): number_of_tests = 10 test_passes = True status = "" logger = logging.getLogger() logger.setLevel(logging.INFO) for i in range(1, number_of_tests + 1, 1): prefix = f"tests/parser/samples/T{i}/" sc = build_scanner(f"{prefix}input.txt") parser = LL1(sc, grammer, CodeGen()) tables.get_token_table().tokens = [] tables.get_symbol_table().clear() tables.get_error_table().parse_trees = [] parser.generate_parse_tree() parser.export_parse_tree('parse_tree.txt') parser.export_syntax_error('syntax_errors.txt') logger.warning(f"test no.{i}:") logger.warning( f"\tparse_tree.txt:\t{open('parse_tree.txt').read().strip() == open(f'{prefix}parse_tree.txt').read().strip()}" ) logger.warning( f"\tsyntax_errors.txt:\t{open('syntax_errors.txt').read().strip().lower() == open(f'{prefix}syntax_errors.txt').read().strip().lower()}" ) test_status = open('parse_tree.txt').read().strip() == open( f'{prefix}parse_tree.txt').read().strip() and open( 'syntax_errors.txt').read().strip().lower() == open( f'{prefix}syntax_errors.txt').read().strip().lower() test_passes = test_passes and test_status status += ("F", ".")[test_status] logger.warning("".ljust(60, "=")) logger.warning(status) logger.warning(("test failed", "test was successful!")[test_passes])
class Parser(object): def __init__(self, input_file): if not os.path.isabs(input_file): input_file = os.path.join(script_dir, input_file) self.scanner = Scanner(input_file) self.semantic_analyzer = SemanticAnalyser() self.code_generator = CodeGen() self._syntax_errors = [] self.root = Node("Program") # Start symbol self.parse_tree = self.root self.stack = [Node("$"), self.root] self.parse_tree_file = os.path.join(script_dir, "output", "parse_tree.txt") self.syntax_error_file = os.path.join(script_dir, "errors", "syntax_errors.txt") @property def syntax_errors(self): syntax_errors = [] if self._syntax_errors: for lineno, error in self._syntax_errors: syntax_errors.append(f"#{lineno} : Syntax Error! {error}\n") else: syntax_errors.append("There is no syntax error.\n") return "".join(syntax_errors) def save_parse_tree(self): with open(self.parse_tree_file, "w", encoding="utf-8") as f: for pre, _, node in RenderTree(self.parse_tree): if hasattr(node, "token"): f.write(f"{pre}{node.token}\n") else: f.write(f"{pre}{node.name}\n") def save_syntax_errors(self): with open(self.syntax_error_file, "w") as f: f.write(self.syntax_errors) def _remove_node(self, node): try: # remove node from the parse tree parent = list(node.iter_path_reverse())[1] parent.children = [c for c in parent.children if c != node] except IndexError: pass def _clean_up_tree(self): ''' remove non terminals and unmet terminals from leaf nodes ''' remove_nodes = [] for node in PreOrderIter(self.parse_tree): if not node.children and not hasattr( node, "token") and node.name != "EPSILON": remove_nodes.append(node) for node in remove_nodes: self._remove_node(node) def parse(self): clean_up_needed = False token = self.scanner.get_next_token() new_nodes = [] self.code_generator.code_gen("INIT_PROGRAM", None) while True: token_type, a = token if token_type in ( "ID", "NUM" ): # parser won't understand the lexim input in this case a = token_type current_node = self.stack[-1] # check the top of the stack X = current_node.name if X.startswith( "#SA"): # X is an action symbol for semantic analyzer if X == "#SA_DEC_SCOPE" and a == "ID": curr_lexim = self.scanner.id_to_lexim(token[1]) self.semantic_analyzer.semantic_check(X, token, self.scanner.line_number) self.stack.pop() if X == "#SA_DEC_SCOPE" and a == "ID": token = (token[0], self.scanner.update_symbol_table(curr_lexim)) elif X.startswith( "#CG"): # X is an action symbol for code generator self.code_generator.code_gen(X, token) self.stack.pop() elif X in terminal_to_col.keys(): # X is a terminal if X == a: if X == "$": break self.stack[-1].token = self.scanner.token_to_str(token) self.stack.pop() token = self.scanner.get_next_token() else: SymbolTableManager.error_flag = True if X == "$": # parse stack unexpectedly exhausted # self._clean_up_tree() break self._syntax_errors.append( (self.scanner.line_number, f'Missing "{X}"')) self.stack.pop() clean_up_needed = True else: # X is non-terminal # look up parsing table which production to use col = terminal_to_col[a] row = non_terminal_to_row[X] prod_idx = parsing_table[row][col] rhs = productions[prod_idx] if "SYNCH" in rhs: SymbolTableManager.error_flag = True if a == "$": self._syntax_errors.append( (self.scanner.line_number, "Unexpected EndOfFile")) # self._clean_up_tree() clean_up_needed = True break missing_construct = non_terminal_to_missing_construct[X] self._syntax_errors.append( (self.scanner.line_number, f'Missing "{missing_construct}"')) self._remove_node(current_node) self.stack.pop() elif "EMPTY" in rhs: SymbolTableManager.error_flag = True self._syntax_errors.append( (self.scanner.line_number, f'Illegal "{a}"')) token = self.scanner.get_next_token() else: self.stack.pop() for symbol in rhs: if not symbol.startswith("#"): new_nodes.append(Node(symbol, parent=current_node)) else: new_nodes.append(Node(symbol)) for node in reversed(new_nodes): if node.name != "EPSILON": self.stack.append(node) # print(f"{X} -> {' '.join(rhs)}") # prints out the productions used new_nodes = [] self.semantic_analyzer.eof_check(self.scanner.line_number) if clean_up_needed: self._clean_up_tree() self.code_generator.code_gen("FINISH_PROGRAM", None)
#!python3 import os import json from code_gen import CodeGen if __name__ == '__main__': path = os.path.join('../resources', 'config.json') config = json.loads(open(path, 'r').read()) codeGen = CodeGen(config['tables']) codeGen.gen_code()
from Parser import init_grammar from Parser.parser import LL1 from code_gen import CodeGen from scanner.default_scanner import build_scanner from scanner.tokens import Token, TokenType from tables import tables # Arshia Akhavan 97110422 # Ghazal Shenavar 97101897 tables.symbol_table.add_symbol(Token(TokenType.ID, "output")) tables.symbol_table.fetch("output").address = 5 parser = LL1(build_scanner("input.txt"), init_grammar(), CodeGen()) parser.generate_parse_tree() parser.export_parse_tree("parse_tree.txt") parser.code_gen.execute_from("main") parser.export_code("output.txt") parser.export_syntax_error("syntax_errors.txt") tables.get_error_table().export("lexical_errors.txt") # tables.get_symbol_table().export("symbol_table.txt") tables.get_token_table().export("tokens.txt")
from lexer import Lexer from parser import Parser from code_gen import CodeGen filename = 'input.toy' with open(filename) as input_file: text_input = input_file.read() lexer = Lexer().get_lexer() tokens = lexer.lex(text_input) code_gen = CodeGen() module = code_gen.module builder = code_gen.builder printf = code_gen.printf parser_gen = Parser(module, builder, printf) parser_gen.parse() parser = parser_gen.get_parser() parser.parse(tokens).eval() code_gen.create_ir() code_gen.save_ir('output.ll')