def test1(): input = "LET foobar = 123" lexer = Lexer(input) while lexer.peek() != '\0': print(lexer.curChar) lexer.nextChar()
def __init__(self, prog): self.lexer = Lexer(prog) self.tokens = self.lexer.lex() self.peeked = False self.prev = None self.tok = self.next_token() self.eof = False self.parsing_declr = False
def test2(): input = "+- */" lexer = Lexer(input) token = lexer.getToken() print(token) while token.kind != TokenType.EOF: print(token.kind) token = lexer.getToken()
def __init__(self, source_code, productions, actions, gotos): self.productions = productions self.actions = actions self.gotos = gotos self.stack = Stack() self.lexer = Lexer(source_code) self.trees = [] self._is_accepted = False
def main(): argparser = argparse.ArgumentParser(description="Generate an AST DOT file.") argparser.add_argument("fname", help="Pascal source file") args = argparser.parse_args() fname = args.fname text = open(fname, "r").read() lexer = Lexer(text) parser = Parser(lexer) viz = ASTVisualizer(parser) content = viz.gendot() print(content)
def main(): print("Teeny tiny compiler") if len(sys.argv) != 2: sys.exit("Error: Compiler needs source file as argument") with open(sys.argv[1], 'r') as inputFile: input = inputFile.read() # Initialize lexer and parser lexer = Lexer(input) parser = Parser(lexer) parser.program() # Start the parser print('Parsing completed')
def __init__(self, fileName, cFile): self.e = Error(fileName) self.nodeStack = [] self.fileName = fileName self.grammar = Grammar(open(fileName).read()) self.dfa = DFA(self.grammar) self.stack = [("DummySymbol", self.dfa.startState)] self.lexer = Lexer(cFile, cFile + ".lex") """ This bit's going to go away soon self.lexed = self.lexer.lexed self.lexed = [x for x in self.lexed.split("\n") if x != ''] self.lexed = [eval(x) for x in self.lexed] """ self.lexed = self.lexer.lexedList self.action = [] self.terminals = self.grammar.terminals self.actions = {} # construct action table for state in self.dfa.transitionTable: self.actions[state] = {} for on in self.dfa.transitionTable[state]: self.actions[state][on] = ("error") self.actions[state]["$"] = ("error") for state in self.dfa.transitionTable: if "DummyStart -> " + self.grammar.startSymbol + " " + itemSeparator in state: if state not in self.actions: self.actions[state] = {"$" : ("accept")} else: self.actions[state]["$"] = ("accept") for transition in self.dfa.transitionTable[state]: actionState = self.dfa.goto(state, transition) if any([itemSeparator + " " + transition in x for x in state]) and actionState is not None: if state not in self.actions: self.actions[state] = {transition : ("shift", actionState)} else: self.actions[state][transition] = ("shift", actionState) if any([x[-1] == itemSeparator for x in state]): matches = [x for x in state if x[-1] == itemSeparator] matches = [x for x in matches if transition in self.grammar.getFollowSet(x.partition(" ")[0])] for match in matches: if match.partition(" ")[0] != "DummyStart": reduceNum = len([x for x in match.partition(" -> ")[2].split(" ") if x != itemSeparator]) if state not in self.actions: self.actions[state] = {transition : ("reduce", match.partition(" ")[0], transition, reduceNum)} else: self.actions[state][transition] = ("reduce", match.partition(" ")[0], transition, reduceNum)
def main(): parser = argparse.ArgumentParser( description="SPI - Simple Pascal Interpreter") parser.add_argument("inputfile", help="Pascal source file") parser.add_argument( "--scope", help="Print scope information", action="store_true", ) parser.add_argument( "--stack", help="Print call stack", action="store_true", ) args = parser.parse_args() global _SHOULD_LOG_SCOPE, _SHOULD_LOG_STACK _SHOULD_LOG_SCOPE, _SHOULD_LOG_STACK = args.scope, args.stack text = open(args.inputfile, "r").read() # print(text) lexer = Lexer(text) try: parser = Parser(lexer) tree = parser.parse() except (LexerError, ParserError) as e: print(e.message) sys.exit(1) semantic_analyzer = SemanticAnalyzer(_SHOULD_LOG_SCOPE) try: semantic_analyzer.visit(tree) except SemanticError as e: print(e.message) sys.exit(1) interpreter = Interpreter() interpreter.interpret(tree)
def main(): print("Mini Java Compiler") if len(sys.argv) < 2: sys.exit("Error: Compiler needs source file as argument.") with pathlib.Path(sys.argv[1]).resolve().open(mode="r") as f: buffer = f.read() target_dir = pathlib.Path(__file__).parent.joinpath("../dumps") target_dir.mkdir(parents=True, exist_ok=True) print(f"{'':-<50}\nLexer Test") lexer = Lexer(buffer) with target_dir.joinpath("./tokens.txt").open("w") as f: print(f"{'Position':10}{'Stream':<10}{'Token name':20}{'Value':20}", file=f) for token in lexer.tokens(): print(token, file=f) print("Lexing completed.") print(f"{'':-<50}\nSymbol Table Test") symtable = SymbolTable(lexer) with target_dir.joinpath("./symtable.json").open("w") as f: json.dump(symtable.data, f, indent=4) print("Symbol table completed.") print(f"{'':-<50}\nParser Test") parser = Parser(lexer) ast = parser.program() print("Parsing completed.") print(f"{'':-<50}\nCode Generator Test") code_gen = CodeGen(ast) code = code_gen.generate_code() with target_dir.joinpath("./output.c").open("w") as f: print(code, file=f) print("Code generation completed.")
dot.edge(p_name, name) for n in node.params: see_node(name, n, dot) see_node(name, node.body, dot) for n in node.localvars: see_node(name, n, dot) elif type(node) == CompoundStmtNode: dot.node(name, str(node.kind)) dot.edge(p_name, name) for n in node.stmts: see_node(name, n, dot) elif type(node) == IfStmtNode: dot.node(name, str(node.kind)) dot.edge(p_name, name) see_node(name, node.cond, dot) see_node(name, node.then, dot) if not node.els is None: see_node(name, node.els, dot) if __name__ == '__main__': path = './t1.c' l = Lexer(filename(path), read_file(path)) l.lex() l.see_tokens() p = Parser(l.tokens) p.parse() see_ast(p.ast)
from lex import Lexer from parse import Parser from codegen import CodeGen fname = "input.toy" with open(fname) as f: text_input = f.read() lexer = Lexer().get_lexer() tokens = lexer.lex(text_input) codegen = CodeGen() module = codegen.module builder = codegen.builder printf = codegen.printf #~ pg = Parser(module, builder, printf) pg1 = Parser() pg1.parse() parser = pg1.get_parser() parser.parse(tokens) codegen.create_ir() #~ codegen.save_ir("output.ll")
def __init__(self, filename): self.file = open(filename) self.pending = None self.lexer = Lexer(self.file)
def scanner(file): lexer = Lexer(file) pending = lexer.lex() while (pending.ltype != "END_OF_INPUT"): print(pending) pending = lexer.lex()
import sys import ply.yacc as yacc from lex import Lexer tokens = Lexer().tokens replacement = ['JZERO', 'JNEG', 'JPOS', 'JUMP'] variables = {} # contains var name and address arrays = {} # contains array name and its address initialized_variables = set() # contains already initialized vars current_instruction = 0 temp_vars = set() register_number = 1 # memory counter ######################################################################################################################## # Declaring 10 temporary registers ######################################################################################################################## # program -> DECLARE declarations BEGIN commands END # 2 | BEGIN commands END ######################################################################################################################## def p_program_with_libs(p): 'program : DECLARE declarations BEGIN commands END' p[0] = str(p[4][0]) + '\nHALT' def p_program_without_libs(p): 'program : BEGIN commands END' p[0] = p[2][0] + '\nHALT'
def __init__(self, **kwargs): self.totalLines = 0 self.result = True self.lexer = Lexer() self.parser = yacc.yacc(module=self, **kwargs)
||| SUMMARY: ||| A simple lexical analyzer that uses an external, ||| simple-format, file for definitions and the respective ||| tokens. Input file can then be specified for checking ||| language against language definition. Output can be ||| presented via print() method or write(filename) method. """ from lex import Lexer yesAnswers = ["y", "yes", "YES", "Yes"] # Run loop by default while True: # Let user choose file for rule definitions ruleDef = input("Enter rules file for definitions: ") lex = Lexer(ruleDef) lex.setRules() # Let user choose input file to be analyzed inputFile = input("Choose code file to read: ") lex.analyze(inputFile) # User response to prompt for output type answer = input("Enter 'print' to output to console, otherwise enter output file name: ") if answer == "print": lex.print() else: lex.writeToFile(answer) # Break loop if user does not wish to read another file answer = input("Analyze another file? ")
def main(): print("JCOSIM: Java Compiler Simulator") try: if len(argv) < 2: raise GetoptError('ERROR: Input file must be specified') options, remainder = getopt(argv[1:], 'i:o:stuapgc:vh', [ 'input=', 'output=', 'symtable', 'token', 'use-gcc', 'analyzedtree', 'analy', 'gencode', 'clean=', 'verbose', 'help', ]) source = None exe = None symtable = False token = False parsetree = False analyzedtree = False gencode = False clean = False clean_path = '.' cc = False for opt, arg in options: if opt in ('-h', '--help'): raise GetoptError('') elif opt in ('-c', '--clean'): clean = True clean_path = arg elif opt in ('-i', '--input'): source = arg elif opt in ('-u', '--use-gcc'): cc = True elif opt in ('-o', '--output'): exe = arg elif opt in ('-s', '--symtable'): symtable = True elif opt in ('-t', '--token'): token = True elif opt in ('-p', '--parsetree'): parsetree = True elif opt in ('-a', '--analyzedtree'): analyzedtree = True elif opt in ('-g', '--gencode'): gencode = True elif opt in ('-v', '--verbose'): symtable = True token = True parsetree = True analyzedtree = True gencode = True # clean and exit if clean: if source: # Smartly get exe file name if not exe: exe = Path(source).stem else: exe = 'Main' files = [ 'tokens.txt', 'parsetree.png', 'analyzedtree.png', 'symtable.json', f'{exe}.c', f'{exe}.exe', f'{exe}', f'{exe}.o', f'{exe}.obj' ] section(*clean_display(files)) for file in files: _path = Path(clean_path).joinpath(file).resolve() if Path(_path).exists(): Path(_path).unlink() exit() # No source no life if not source: raise GetoptError('ERROR: Input file must be specified') # Smartly get exe file name if not exe: exe = Path(source).stem # Read Java source file with open(source, 'r') as f: buffer = f.read() # Lexing lexer = Lexer(buffer) # Parsing parser = Parser(lexer) program_tree = parser.program() # Generate symbol table lexer.reset() stb = SymbolTable(lexer) # Semantic semantic = Semantic(program_tree, stb) analyzed_tree = semantic.analyze() # Generate C code code_gen = CodeGen(analyzed_tree, stb) code = code_gen.generate_code() # Compile the code and output native binary section(*native_compile_display(code, exe, cc)) # do things based on flags if token: section(*token_display(lexer)) if symtable: section(*symtable_display(stb)) if parsetree: section(*parsetree_display(program_tree, 'parsetree.png')) if analyzedtree: section(*parsetree_display(analyzed_tree, 'analyzedtree.png')) if gencode: section(*gencode_display(code, exe)) except GetoptError as e: section(*help_text()) print(e)
try: # check that SLR table can be read with open("slr_table.csv", "rt") as f: actions, gotos = loadTable(f) # printActions(actions) except IOError: raise ERROR[5] # Couldn’t open SLR table file. # in the beginning we will write the input... # as a sequence of terminal symbols, ending by $ # the input will be the output of the lexical analyzer output = [] try: lexer = Lexer(input_) tokens = { key: result[1].name.lower() # result = (lexeme, token) for key, result in lexer.output.items() } except: raise ERROR[3] # Lexical error tree = parse(tokens, grammar, actions, gotos) if tree: print("Input is syntactically correct!") tree.print() else: print("Code has syntax errors!")
import re from lex import Lexer from parse_qr import Parser if __name__ == '__main__': lexer = Lexer().build() file = open('test.txt') text_input = file.read() file.close() lexer.input(text_input) # while True: # tok = lexer.token() # if not tok: break # print(tok) parser = Parser() parser.build().parse(text_input, lexer, False)
precedence = { '||': 5, '&&': 5, '>': 6, '>=': 6, '<=': 6, '<': 6, '==': 6, '!=': 6, '+': 7, '-': 7, '*': 8, '/': 8, 'UMINUS': 9 } tokens = list(Lexer('1+-10*123').lex()) precs = {'UMINUS': ['E', '-', 'E'], 'POSITIVE': ['E', '+', 'E']} # for t in tokens: # print(t) # print(sm.productions()) # parser = Parser(productions, terminal, nonterminal) parser = Parser(sm.productions, sm.terminal, sm.nonterminal, precedence=precedence, precs=precs) parser.generate() parser.parse(tokens, sm.sdmap) print(calls)
class Parser: tokens = Lexer().tokens def __init__(self): pass def p_program(self, p): """program : declist MAIN LRB RRB block""" print("program : declist MAIN LRB RRB block" ) #dghighan mesle fili ke gozashtin def p_decli_version1(self, p): """declist : dec""" print("declist : dec ") # type1 def p_decli_version2(self, p): """declist : declist dec""" print("declist : declist ") # type2 def p_decli_version3(self, p): """declist : empty""" print("declist : empty ") # type3 def p_decversion1(self, p): """dec : vardec """ print("""dec : vardec """) def p_decversion2(self, p): """dec : funcdec """ print("""dec : funcdec """) def p_type1(self, p): """type : INTEGER""" print("""type : INTEGERNUMBER""") def p_type2(self, p): """type : FLOAT""" print("""type : FLOAT""") def p_type3(self, p): """type : BOOLEAN""" print("""type : BOOLEAN""") def p_iddec_version1(self, p): """iddec : ID""" print("""iddec : ID""") def p_iddec1_version2(self, p): """iddec : ID LSB exp RSB""" print("""ID LSB exp RSB""") def p_iddec1_version3(self, p): """iddec : ID ASSIGN exp""" print("""iddec : ID ASSIGN exp""") def p_idlist_version1(self, p): """idlist : iddec""" print("""idlist : iddec""") def p_idlist_version2(self, p): """idlist : idlist COMMA iddec""" print("""idlist : idlist COMMA iddec""") def p_vardec(self, p): """vardec : type idlist SEMICOLON""" print("""vardec : type idlist SEMICOLON""") #semicolen ham dashterror def p_funcdec_vesrion1(self, p): """funcdec : type ID LRB paramdecs RRB block""" print("""funcdec : type ID LRB paramdecs RRB block""") def p_funcdec_version2(self, p): """funcdec : VOID ID LRB paramdecs RRB block""" print("""funcdec : VOID ID LRB paramdecs RRB block""") def p_paramdecs_vesrion1(self, p): """paramdecs : paramdecslist""" print("""paramdecs : paramdecslist""") def p_paramdecs_version2(self, p): """paramdecs : empty""" print("""paramdecs : empty""") def p_paramdecslist_vesrion1(self, p): """paramdecslist : paramdec""" print("""paramdecslist : paramdec""") def p_paramdecslist_vesrion2(self, p): """paramdecslist : paramdecslist COMMA paramdec""" print("""paramdecslist : paramdecslist COMMA paramdec""") def p_paramdec_vesrion1(self, p): """paramdec : type ID""" print("""paramdec : type ID""") def p_paramdec_version2(self, p): """paramdec : type ID LSB RSB""" print("""paramdec : type ID LSB RSB""") def p_varlist_version1(self, p): """varlist : vardec""" print("""varlist : vardec""") def p_varlist_version2(self, p): """varlist : varlist vardec""" print("""varlist : varlist vardec """) def p_varlist_version3(self, p): """varlist : empty""" print("""varlist : empty""") def p_block(self, p): """block : LCB varlist stmtlist RCB""" print("block : LCB varlist stmtlist RCB") def p_stmtlist_version1(self, p): """stmtlist : stmt""" print("""stmtlist : stmt""") def p_stmtlist_version2(self, p): """stmtlist : stmtlist stmt""" print("""stmtlist : stmtlist stmt""") def p_stmtlist_version3(self, p): """stmtlist : empty""" print("""stmtlist : empty""") def p_lvalue_version1(self, p): """lvalue : ID""" print("""lvalue : ID""") def p_lvalue_version2(self, p): """lvalue : ID LSB exp RSB""" print("""lvalue : ID LSB exp RSB""") def p_stmt_version1(self, p): """stmt : matched_stmt""" print("""stmt : matched_stmt""" ) #shyd mohemtrin bakhsh taghsim kedn be match va unmached bshe def p_stmt_version2(self, p): """stmt : unmatched_stmt""" print("""stmt : unmatched_stmt""") def p_stmt_matched_version1(self, p): """matched_stmt : IF LRB exp RRB matched_stmt elseiflist ELSE matched_stmt %prec IF_D""" print( """matched_stmt : IF LRB exp RRB matched_stmt elseiflist ELSE matched_stmt""" ) #if ghbol nmird def p_stmt_matched_version2(self, p): """matched_stmt : everythingthatcanhappen""" print("""matched_stmt : everythingthatcanhappen(other)""") def p_stmt_unmatched_version1(self, p): """unmatched_stmt : IF LRB exp RRB matched_stmt elseiflist %prec IF_D""" print("""unmatched_stmt : IF LRB exp RRB stmt elseiflist""") def p_stmt_unmatched_version2(self, p): """unmatched_stmt : IF LRB exp RRB matched_stmt elseiflist ELSE unmatched_stmt %prec IF_D""" print( """unmatched_stmt : IF LRB exp RRB matched_stmt elseiflist ELSE unmatched_stmt""" ) def p_stmt_others_version1(self, p): """everythingthatcanhappen : RETURN exp SEMICOLON""" print("""everythingthatcanhappen : RETURN exp SEMICOLON""") def p_stmt_others_version2(self, p): """everythingthatcanhappen : exp SEMICOLON""" print("""everythingthatcanhappen : exp SEMICOLON""") def p_stmt_others_version3(self, p): """everythingthatcanhappen : block""" print("""everythingthatcanhappen : block""") def p_stmt_others_version4(self, p): """everythingthatcanhappen : WHILE LRB exp RRB stmt""" print(""" WHILE LRB exp RRB stmt""") def p_stmt_others_version5(self, p): """everythingthatcanhappen : FOR LRB exp SEMICOLON exp SEMICOLON exp RRB stmt""" print( """everythingthatcanhappen : FOR LRB exp SEMICOLON exp SEMICOLON exp RRB stmt""" ) def p_stmt_others_version6(self, p): """everythingthatcanhappen : PRINT LRB ID RRB SEMICOLON""" print("""everythingthatcanhappen : PRINT LRB ID RRB SEMICOLON""") def p_elseiflist_version1(self, p): """elseiflist : ELIF LRB exp RRB stmt""" print("""elseiflist : ELIF LRB exp RRB stmt""") def p_elseiflist_version2(self, p): """elseiflist : elseiflist ELIF LRB exp RRB stmt""" print("""elseiflist : elseiflist ELIF LRB exp RRB stmt""") def p_elseiflist_version3(self, p): """elseiflist : empty""" print("""elseiflist : empty""") def p_exp_version1(self, p): """exp : lvalue ASSIGN exp""" print("""exp : lvalue ASSIGN exp""") def p_exp_version2(self, p): """exp : exp operator exp %prec OP""" print("""exp : exp operator exp""") def p_exp_version3(self, p): """exp : exp relop exp %prec RELOP""" print("""exp : exp relop exp""") def p_exp_version5(self, p): """exp : const""" print("""exp : const""") def p_exp_version6(self, p): """exp : lvalue""" print("""exp : lvalue """) def p_exp_version7(self, p): """exp : ID LRB explist RRB""" print("""exp : ID LRB explist RRB""") def p_exp_version8(self, p): """exp : LRB exp RRB""" print("""exp : LRB exp RRB""") def p_exp_version9(self, p): """exp : ID LRB RRB""" print("""exp : ID LRB RRB""") def p_exp_version10(self, p): """exp : SUB exp %prec UMINUS""" print("""exp : SUB exp""") def p_exp_version11(self, p): """exp : NOT exp""" print("""exp : NOT exp""") def p_operator_version1(self, p): """operator : OR""" print("""operator : OR""") def p_operator_version2(self, p): """operator : AND""" print("""operator : AND""") def p_operator_version3(self, p): """operator : SUM""" print("""operator : SUM""") def p_operator_version4(self, p): """operator : SUB""" print("""operator : SUB""") # def p_operator_version5(self, p): # """operator : MUL""" # print("""operator : MUL""") def p_operator_version6(self, p): """operator : MUL""" print("""operator : MUL""") def p_operator_version7(self, p): """operator : DIV""" print("""operator : DIV""") def p_operator_version8(self, p): """operator : MOD""" print("""operator : MOD""") def p_const_version1(self, p): """const : INTEGERNUMBER""" print("""const : INTEGERNUMBER""") def p_const_version2(self, p): """const : FLOATNUMBER""" print("""const : FLOATNUMBER""") def p_const_version3(self, p): """const : TRUE""" print("""const : TRUE""") def p_const_version4(self, p): """const : FALSE""" print("""const : FALSE""") def p_relop_version1(self, p): """relop : GT""" print("""relop : GT""") def p_relop_version2(self, p): """relop : LT""" print("""relop : LT""") def p_relop_version3(self, p): """relop : NE""" print("""relop : NE""") def p_relop_version4(self, p): """relop : EQ""" print("""relop : EQ""") def p_relop_version5(self, p): """relop : LE""" print("""relop : LE""") def p_relop_version6(self, p): """relop : GE""" print("""relop : GE""") def p_explist_version1(self, p): """explist : exp""" print("""explist : exp""") def p_explist_version2(self, p): """explist : explist COMMA exp""" print("""explist: explist COMMA exp""") def p_empty(self, p): """empty : %prec EMPTY""" pass precedence = ( ('nonassoc', 'EMPTY'), #olaviate bala akhrin olaviuat ('nonassoc', 'IF_D'), ('nonassoc', 'ELIF'), ('nonassoc', 'ELSE'), ('nonassoc', 'RELOP'), ('nonassoc', 'OP'), ('left', 'COMMA'), ('right', 'ASSIGN'), ('left', 'OR'), ('left', 'AND'), ('left', 'EQ', 'NE'), ('left', 'GT', 'LT', 'GE', 'LE'), ('left', 'SUM', 'SUB'), ('left', 'MUL', 'DIV', 'MOD'), ('right', 'NOT'), ('right', 'UMINUS')) def p_error(self, p): print(p.value) raise Exception('ParsingError: invalid grammar at ', p) def build(self, **kwargs): """build the parser""" self.parser = yacc.yacc(module=self, **kwargs) return self.parser