def test_while(self): self.assertEqual(lexer("<(_ _<(_ _<(_ a _)>_ _)>_ _)>"), [{ 'toktype': 14, 'value': None }, { 'toktype': 1, 'value': 'a' }, { 'toktype': 15, 'value': None }]) self.assertEqual(lexer("<(_ _<(_ _<(_ a _)>_ _)>_ _)> ʢᵕᴗᵕʡ"), [{ 'toktype': 14, 'value': None }, { 'toktype': 1, 'value': 'a' }, { 'toktype': 15, 'value': None }, { 'toktype': 16, 'value': None }])
def load_cpp(file_name): tokens = [] with open(file_name, "r") as file: for line in file: tokens += lexer(line) print(line) for t in lexer(line): print(t) print('_' * 20) return tokens
def test_more(self): self.assertEqual(lexer("ʕ'ᴥ'ʔ"), [{'toktype': 21, 'value': None}]) self.assertEqual(lexer("2 ʕ'ᴥ'ʔ 9"), [{ 'toktype': 3, 'value': 2 }, { 'toktype': 21, 'value': None }, { 'toktype': 3, 'value': 9 }])
def outputCode(fileList, stronglinking, customOutDir, vmfinaloutput): import os # Set parse n and set up SymbolTable/modules that depend on it backEnd.setGlobals(parseNum=1) vars_, classAndFns = backEnd.SymbolTable.initialize() backEnd.processCode.initializeHashTables(vars_, classAndFns) backEnd.returnSemantics.initialize(classAndFns) # Stub out our output (as in I/O) to do nothing on initial parse output = backEnd.SetupOutput('initial') backEnd.processCode.defineOutput(output) parser.initializeGlobals(output, classAndFns, vars_) print('Doing initial parse of:') # Initial parse; fleshes out hash-tables, so that we have relevant # typing/function prototype (&c) information, for the output stage \/ for fileName in fileList: print(fileName) tokenGenerator = lexer(fileName) globalVars.defineGlobalInputFile(fileName) parser.parseClass(tokenGenerator) # Second parse + code output \/ backEnd.setGlobals(parseNum=2) for fileName in fileList: if customOutDir: # We've specified a custom directory path for output. # Files are still INPUT_FILE_PREFIX.jack, base = os.path.basename(fileName)[:-5] + '.vm' outFileName = os.path.join(customOutDir, base) else: outFileName = fileName[:-5] + '.vm' globalVars.defineGlobalInputFile(fileName) tokenGenerator = lexer(fileName) # \/ Make output (I/O) object actually write out for 2nd parse output.defineOutputValues('codeOutput', outFileName) parser.parseClass(tokenGenerator) if vmfinaloutput is True: # \/ We only output file names if we're keeping output files. If # this is false, VM files are just a step toward full conversion # (i.e. we're using JackCC as a 1st stage for further processing) print('Output: %s' % outFileName) output.closeFile()
def run(source_file, **kwargs): file_handler = open(source_file, 'r') source = file_handler.read() tokens = lexer.lexer(source) if kwargs["strip_comments"]: tokens = list(filter(lambda x: x["type"] != "COMMENT", tokens)) tokens = list( filter(lambda x: x["type"] != "MULTILINE-COMMENT", tokens)) if kwargs["tokens"]: pprint(tokens) return ast_ = parser.parse(tokens) if kwargs["ast"]: pprint(ast_) return py_ast = py_parser.ast_to_py_ast(ast_) if kwargs["py_ast"]: print(ast.dump(py_ast)) return if kwargs["py_code"]: print(astunparse.unparse(py_ast)) return exec(compile(py_ast, filename="<ast>", mode="exec"))
def test_equasion(self): self.assertEqual(lexer("12 ❤ {3.5 ✿ 4} ๑ 7 ✰ 3"), [{ 'toktype': 3, 'value': 12 }, { 'toktype': 8, 'value': None }, { 'toktype': 11, 'value': None }, { 'toktype': 4, 'value': 3.5 }, { 'toktype': 7, 'value': None }, { 'toktype': 3, 'value': 4 }, { 'toktype': 12, 'value': None }, { 'toktype': 10, 'value': None }, { 'toktype': 3, 'value': 7 }, { 'toktype': 9, 'value': None }, { 'toktype': 3, 'value': 3 }])
def main(): content = "" #creates a content variable with open('test.tnt', 'r') as file: #opens the test.tnt file to observe its code content = file.read() #reads the code in the test.tnt file lex = lexer.lexer(content) #initialize the lexer function tokens = lex.tokenize() #uses the tokenize function to create tokens
def parseEquations(self, files, lazy=False): equations = [] for file in files: fileobj = open(file, "r") lex = lexer.lexer() for line in fileobj: line = line.replace("\n", "") line = line.replace(" ", "") rule = line.split(':') if (len(rule) != 2): continue try: if (rule[0][0] == '{' and rule[0][-1] == '}'): lex.addVariable(rule[0], lex.statement(rule[1])) continue if (not lazy): p = lex.preParse(rule[1]) if (p == None): continue rule[1] = lex.extraParen(p.toString()) rootToken = lex.statement(rule[1]) equations.append((rule[0], rootToken)) except ValueError as E: print("Error lexing rule '" + str(rule[0]) + "'") print("Error returned: " + str(E)) fileobj.close() return equations
def __init__(self, filename): self.lexer = lexer.lexer(filename) self.global_namespace = {} self.current_block = None self.break_label = None self.continue_label = None self.parse_map = { kw_static: self.parse_static, kw_return: self.parse_return, kw_for: self.parse_for, kw_do: self.parse_do, kw_if: self.parse_if, kw_while: self.parse_while, kw_goto: self.parse_goto, kw_break: self.parse_break, kw_continue: self.parse_continue, kw_switch: self.parse_switch, kw_case: self.parse_case, kw_default: self.parse_default, } for t in type_names: self.parse_map[t] = self.parse_type_decl pass try: while not self.lexer.peek_token(tok_eof): self.parse_global_var_or_proc() pass pass except (lexer.lex_error, parse_error), msg: self.error(msg) pass
def outputTokens(fileList): # Set parse n and set up SymbolTable/modules that depend on it backEnd.setGlobals(parseNum=0) vars_, classAndFns = backEnd.SymbolTable.initialize() backEnd.processCode.initializeHashTables(vars_, classAndFns) backEnd.returnSemantics.initialize(classAndFns) # Set up the `o` part of I/O output = backEnd.SetupOutput('test') backEnd.processCode.defineOutput(output) parser.initializeGlobals(output, classAndFns, vars_) # ...now for the `i` for fileName in fileList: outFileName = fileName[:-5] + 'T_.xml' globalVars.defineGlobalInputFile(fileName) output.defineOutputValues('test', outFileName) print('Reading: %s' % fileName) # Outputs tokens in XML tokenGenerator = lexer(fileName) backEnd.processCode.output.startt('tokens') # opening tag `<tokens>` for token in tokenGenerator: backEnd.processCode.output.outt(token) # tokenizing + output backEnd.processCode.output.endt('tokens') # closing tag `</tokens>` print('Tokenized output: %s' % outFileName) output.closeFile()
def outputParseTree(fileList): # Set parse n and set up SymbolTable/modules that depend on it backEnd.setGlobals(parseNum=0) vars_, classAndFns = backEnd.SymbolTable.initialize() backEnd.processCode.initializeHashTables(vars_, classAndFns) backEnd.returnSemantics.initialize(classAndFns) # Set up the `o` part of I/O output = backEnd.SetupOutput('test') backEnd.processCode.defineOutput(output) parser.initializeGlobals(output, classAndFns, vars_) # ...now for the `i` for fileName in fileList: print('Reading: %s' % fileName) outFileName = fileName[:-5] + '_.xml' globalVars.defineGlobalInputFile(fileName) output.defineOutputValues('test', outFileName) # Outputs parse tree in XML tokenGenerator = lexer(fileName) parser.parseClass(tokenGenerator) print('Parse tree output: %s' % outFileName) output.closeFile()
def main(): import commandline_args args = commandline_args.parser.parse_args() flags = args.__dict__ if args.debug >= 7: print 'Arguments namespace: ' print repr(args) if args.infile is None: import console console.main(**flags) else: import lexer raw_code = open(args.infile).read() lexed = lexer.lexer(raw_code, **flags) import dissecter solved = dissecter.parser(lexed, **flags) import semi_assembly_generator semi_assembly = semi_assembly_generator.single_op(solved, **flags) if args.outfile == 'stdout': outfile = sys.stdout else: if args.outfile is None: args.outfile = args.infile + '.out' outfile = open(args.outfile, 'w') outfile.write('\n'.join(map(str, semi_assembly)))
def tokenize(program): """ # Genera una instancia 'atom' para la clase asociada al token obtenido mediante tokenize_python # (tokenize module). Ver symbol_table. """ from lexer import lexer for token in lexer(program): if token.id == "number" or token.id == "string": Clase_token = symbol_table["Const"] atom = Clase_token() atom.value = token.value else: Clase_token = symbol_table.get(token.value) if Clase_token: atom = Clase_token() elif token.id == "Name": Clase_token = symbol_table[token.id] atom = Clase_token() atom.value = token.value else: raise SyntaxError("Unknown operator (%r)" % token.value) yield atom
def test01_basic(self): """Test to make sure this works on a simple, one line file, using the tokens.txt token file.""" L = ["begin"] file = "test.txt" token_file = "tokens.txt" create_file(L, file) G = lexer.lexer(file, token_file) self.assertTrue(next(G) == lexer.Token("RESERVED", "BEGIN", "begin", "begin", 1, 0))
def obtenertokens(src): lista = [] cadena = lexer(src) for token in cadena: lista.append(token[0]) lista.append('#') #Simbolo de fin de candena return lista
def test_both(self): with open('demo.txt') as f: read_data = f.read() with open('demo2.txt') as f: read_data2 = f.read() self.assertEqual(lexer(read_data), lex(read_data2))
def test_print(self): self.assertEqual(lexer("φ(..)12"), [{ 'toktype': 0, 'value': None }, { 'toktype': 3, 'value': 12 }])
def __init__(self, fname, scheck=False, d=False): #d is the debug value self.lexer = lexer.lexer(fname, False) self.thefilename = fname self.token = self.lexer.getToken self.nexttoken = self.lexer.getNext self.tkgen = self.lexer.tokengenerator self.classnames = set(['int', 'char', 'bool', 'void', 'sym']) self.symtab = symtable(d) self.semcheck = scheck
def __init__(self, fname, scheck=False, d=False) : #d is the debug value self.lexer = lexer.lexer(fname, False) self.thefilename = fname self.token = self.lexer.getToken self.nexttoken = self.lexer.getNext self.tkgen = self.lexer.tokengenerator self.classnames = set(['int', 'char', 'bool', 'void', 'sym']) self.symtab = symtable(d) self.semcheck = scheck
def _compile(self): self.plainTextEdit.clear() print(self.editor.text()) try: parser.parse(str(self.editor.text()), lexer()) self.plainTextEdit.appendPlainText("Programa compilado com sucesso") SemanticTools.reset() except Exception, e: self.plainTextEdit.setPlainText(unicode(e.message))
def test_final(self): string = u"""def quadrado : [ lado := 0; input (lado); area := lado * lado; output (area); ]""" parser.parse(string, lexer()) self.assertEqual(''.join(SemanticTools.code), '.assembly extern mscorlib{}\n .assembly teste{}\n .module teste.exe\n .class public teste\n {\n .method public static void principal ()\n {\n .entrypoint .locals init (int32 lado) .locals init (int32 area) ldc.i4 0 stloc ladocall string [mscorlib]System.Console::ReadLine()call int32 [mscorlib]System.Int32::Parse(string)stloc lado ldloc lado\n ldloc lado\n mul\n stloc area ldloc area call void [mscorlib]System.Console::Write(int32)\n ret\n }\n }\n ')
def test03_LexerError(self): """Makes sure next(G) throws a LexerError exception if it hits a bad token.""" L = ["ab%cd"] file = "test.txt" token_file = "tokens.txt" create_file(L, file) G = lexer.lexer(file, token_file) next(G) with self.assertRaises(lexer.LexerError): next(G)
def main(): from lexer import lexer from mlparser import parser from pprint import pprint with open('fizzbuzz.mil') as f: src = f.read() tokens = lexer(src) ast = parser(tokens) pprint(ast) pprint(translate(ast, ()))
def main(): a = input() filename = a file = open(filename, "r") Lexer = lexer(file) Lexer.tokenizer() parser = Parser(Lexer.tokens) parser.build_AST() evaluator = Evaluator(parser.AST) evaluator.run(parser.AST)
def test_variable_assignment(self): self.assertEqual(lexer("a (^._.^)ノ 7"), [{ 'toktype': 1, 'value': 'a' }, { 'toktype': 6, 'value': None }, { 'toktype': 3, 'value': 7 }])
def main(): content = "" with open("test.lang", "r") as file: content = file.read() #build the lexer and parser lex = lexer.lexer(content) parse = parser.parser(content) tokens = lex.tokenize()
def main(): from lexer import lexer from mlparser import parser from translate import translate from pprint import pprint with open('fizzbuzz.mil') as f: src = f.read() tokens = lexer(src) ast = parser(tokens) code = translate(ast, ()) run(code)
def test02_StopIteration(self): """Makes sure next(G) throws a StopIteration excpetion if done. (This should happen automatically if you used yield.""" L = ["begin"] file = "test.txt" token_file = "tokens.txt" create_file(L, file) G = lexer.lexer(file, token_file) next(G) with self.assertRaises(StopIteration): next(G)
def compiler(input_path: str): print_tokens = True with open(input_path, "r") as f: tokens = lexer(f.read()) if print_tokens: for token in tokens: print(token) print(parser(tokens))
def getlexer(specfname, debug=0, forcegen=0): """ Generate a lexer table, construct a lexer for lexing fname and return it. Both the lexer and the generated module are returned. """ import lexer if specfname[-4:] != ".pyl": raise ApiError("bad spec filename %s" % specfname) tab = specfname[:-4] + "_lextab.py" generate(specfname, tab, debug=debug, forcegen=forcegen) l = _import(tab) return lexer.lexer(l.lexspec), l
def init(strinput): '''Initialize the parser''' global lexgen, token0, token1 lexgen = lexer.lexer(strinput) try: token0 = token1 = next(lexgen) except StopIteration: print('Warning: file empty') token1 = lexer.Token(lexer.NONE, None, None, None) nextToken()
def main(): # read the source code first content = "" with open('C:/Users/Raza Haider/Desktop/Slither/src/test.lang', 'r') as file: content = file.read() # lexer lex = lexer.lexer(content) token = lex.tokenize() for tk in token: print(tk)
def getlexer(specfname, debug=0, forcegen=0) : """ Generate a lexer table, construct a lexer for lexing fname and return it. Both the lexer and the generated module are returned. """ import lexer if specfname[-4:] != ".pyl" : raise ApiError("bad spec filename %s" % specfname) tab = specfname[:-4] + "_lextab.py" generate(specfname, tab, debug=debug, forcegen=forcegen) l = _import(tab) return lexer.lexer(l.lexspec), l
def processMacro(self): List = lexer.lexer(self.code) for line in List: for token in range(len(line)): if line[token].type == '_DEFINE': row = line[token + 1] replace = line[token + 2] List.remove(line) for newline in List: for newtoken in newline: if newtoken.value == row.value: newtoken.value = replace.value newtoken.type = replace.type self.codeTokenList = List
def parsespec(fname, outfname, debug = 0) : l = lexer.lexer(lexspec) l.setinput(fname) g = srgram.SRGram(gt.gramspec) p = glr.GLR(g) p.setlexer(l) try : tree = p.parse() if debug >= 10 : glr.dottree(tree) helpers.proctree(tree, gt) except ParseError,e : raise SpecError("%s:%d: parse error at %r" % (fname, pylly_lextab.lineno, e.str))
def _generate(self): self.plainTextEdit.clear() try: fileInfo = QtCore.QFileInfo(self.filename) nameLayer = str(fileInfo.baseName()) SemanticTools.program_name = nameLayer parser.parse(str(self.editor.text()), lexer()) self.plainTextEdit.appendPlainText(u"Código objeto gerado com sucesso") fname = open(self.filename + ".il", 'w') fname.write("\n".join(SemanticTools.code)) fname.close() SemanticTools.reset() except Exception, e: self.plainTextEdit.setPlainText(unicode(e.message))
def compile(grammar): grammar = grammar.replace('\r\n', '\n') grammar += '\n' prods = grammar.split('\n') prods_tokens = [] for prod in prods: prod = prod.strip() if len(prod) == 0: continue tokens = lexer.lexer(prod) if len(tokens) > 0: prods_tokens.append(list(tokens)) tree = parse.parse(prods_tokens) code = codegen.codegen(tree) return code
def compile( grammar ): grammar = grammar.replace( '\r\n' , '\n' ) grammar += '\n' prods = grammar.split( '\n' ) prods_tokens = [] for prod in prods: prod = prod.strip() if len( prod ) == 0: continue tokens = lexer.lexer( prod ) if len( tokens ) > 0: prods_tokens.append( list( tokens ) ) tree = parse.parse( prods_tokens ) code = codegen.codegen( tree ) return code
def __init__(self, parent: QtWidgets.QWidget, editor: Qsci.QsciScintilla): self.parent = parent self.file_list = [ 'main.pyw', 'core.py', 'GUI.py', 'config.py', 'mainWin.py', 'aboutWin.py', 'img/logo.png', 'img/logo.ico', 'json/config.json' ] self.cheak_file_integrity() self.config_file: str = 'json/config.json' self.load_config() self.lexer = lexer.lexer(editor) self.lexer_json = lexer.lexer_json(editor) self.set_lexer()
def main(p, s): """Use cursors, a queue, and memoization to match glob pattern to string.""" # Populate initial variables. p_cursor = 0 s_cursor = 0 cursor_pair_queue = deque([(p_cursor, s_cursor)]) cursor_pairs_seen = {} # Add all elements of string to dictionary of actions. actions = {'char': count_character, 'question_mark': question_mark, 'star': star, 'set': check_set, 'negset': check_negset} # Prune any redundant * in pattern. while '**' in p: p = p.replace('**', '*') # Process pattern with lexer. p = L.lexer(p) print('lexed p:', p) # Start traversing string and adding cursor-pairs to queue. while cursor_pair_queue: print('cursor_pair_queue:', cursor_pair_queue) p_cursor, s_cursor = cursor_pair_queue.popleft() # Eliminate cursor-pairs already examined or having invalid s-cursor. if (p_cursor, s_cursor) in cursor_pairs_seen or s_cursor == len(s): continue else: cursor_pairs_seen[(p_cursor, s_cursor)] = True # Get next character of pattern if p_cursor < len(p): next_char = p[p_cursor][0] else: continue # Compare character-pairs. try: new_pairs = actions[next_char](p, s, p_cursor, s_cursor) except KeyError: print('KeyError: {}'.format(next_char)) return False if new_pairs: if s_cursor == len(s) - 1 and p_cursor == len(p) - 1: return True cursor_pair_queue.extend(new_pairs) # If we are here, queue is empty but either p or s is not yet used up. return False
def test04_LexerError2(self): """Makes sure next(G) throws an error message containing the strings "line x" and "column y", where x and y are the line and column numbers of the first character of the bad token. (Line numbers counted from 1; column numbers counted from 0.)""" L = ["ab%cd"] file = "test.txt" token_file = "tokens.txt" create_file(L, file) G = lexer.lexer(file, token_file) next(G) try: l = next(G) except lexer.LexerError as e: r1 = re.search("line \d+", str(e)) r2 = re.search("col(umn)?\s+\d+", str(e)) self.assertTrue(r1 != None and r2 != None) else: self.assertTrue(False) # No exception was thrown.
def interpreter(input_path: str): print_tokens = False print_nodes = False with open(input_path, "r") as f: tokens = lexer(f.read()) if print_tokens: for token in tokens: print(token) nodes = parse(tokens) if print_nodes: for node in nodes: print(node) memory, mp = execute(nodes)
def translate(self): translator = Translator.translator('fa', 'en') key_word = {"اگر": "if", "وگر": "elif", "وگرنه": "else", "واردکن": "import", "کلاس": "class", "تعریف": "def", "تازمانیکه": "while", "برای": "for", "در": "in", "نه": "not", "و": "and", "یا": "or", "برگردان": "return", "چاپ": "print", "ادامه": "continue", "توقف": "break"} sybols = [":", "<", "<", "=", "==", "+", "_", "*", "**", "(", ")", "[", "]", "{", "}", "\\", "\"", "\'", "\n", "<=", "<>", ">=", "#", ""] codeTextPersian = self.srcTextEdit.toPlainText() codeWordsPersian = codeTextPersian.replace(" ", " s ").replace("\n", " n ").replace("\t", " t ").split(" ") englishCode = "" word_dict = {} for word in codeWordsPersian: if word in key_word.keys(): word_dict[key_word[word]] = word englishCode += key_word[word] elif word in sybols: word_dict[word] = word englishCode += word elif word == "s": englishCode += " " elif word == "n": englishCode += "\n" elif word == "t": englishCode += "\t" else: newWord = 'ـ'.join(translator.translate(word).split(" ")) word_dict[newWord] = word englishCode += newWord print(word_dict) lex = lexer.lexer(englishCode) lex = lex.split("\n") finalCode = "" for line in lex: l = line.split(" |") if l[0] in word_dict.keys(): if len(l) > 1: finalCode += str(word_dict[l[0]]) + " |" + l[1] + "\n" self.destTextEdit.setPlainText(finalCode) finalFile = open("englishCode.txt", "w") finalFile.write(englishCode)
def m_button2OnButtonClick(self, event): self.m_textCtrl7.Clear() content = self.m_textCtrl6.GetValue() i = 0 token_table= 'Sequence' + '(Token , token type)\n' # file_log = open("./dir/log.txt", "wb") tokens = lexer.lexer(content, token_exprs) if not isinstance(tokens, list): self.m_textCtrl7.SetValue('The %d line: Syntax error(0) the\' %s\' token cannot be recognized' % (tokens['line'], tokens['character'])) else: self.m_textCtrl7.AppendText(token_table) for token in tokens: ##遍历符号表 i = i + 1 token_table = token_table +'\n' + str(token) self.m_textCtrl7.AppendText('[%d]' %(i) + str(token) + '\n')
def parser(source_file, token_file): """ source_file: A program written in the ML langauge. returns True if the code is syntactically correct. Throws a ParserError otherwise. """ G = lexer(source_file, token_file) try: result = PROGRAM(next(G), G) try: next(G) #at this point the source should have no more tokens - if the iterator has more, then it is actually a ParserError raise ParserError("Tokens exist after END keyword.") except StopIteration: return result except ParserError as e: raise e except StopIteration as e: raise ParserError("Program ends before END token")
def parser(source_file, token_file): """ :param source_file: A program written in the ML language. :param token_file: A file defining the types of tokens in the ML language returns True if the code is syntactically correct. Throws a ParserError otherwise. """ G = lexer(source_file, token_file) try: current, t, s = PROGRAM(next(G), G) try: next(G) # at this point the source should have no more tokens - if the iterator has more, then it is actually a ParserError raise ParserError("Syntax Error: Tokens exist after END keyword.") except StopIteration: return t, s except ParserError as e: raise e except StopIteration: raise ParserError("Syntax Error: Program ends before END token")
def compiler(input_path: str): print_tokens = False print_nodes = False with open(input_path, "r") as f: tokens = lexer(f.read()) if print_tokens: for token in tokens: print(token) nodes = parse(tokens) if print_nodes: for node in nodes: print(node) compile( nodes, "/home/sakuk/Documents/brainfuck-compiler/tests", #"/home/sakuk/Documents/brainfuck-compiler/tests.c" )
def parsespec(fname, outfname, debug=0): """ Parse the spec file, generate parsing tables and write it out. Debug levels less than 10 are for showing data about the parsed spec file levels 10 and higher are for showing internal data. """ l = lexer.lexer(pyggy_lextab.lexspec) l.setinput(fname) g = srgram.SRGram(gt.gramspec) p = glr.GLR(g) p.setlexer(l) try: tree = p.parse() # print the parse tree of the spec file if debug >= 11: printcover = debug >= 12 glr.dottree(tree, printcover) helpers.proctree(tree, gt) except ParseError, e: raise SpecError("%s:%d: parse error at %r" % (fname, pyggy_lextab.lineno, e.str))
def run(self): semantic.symtab = self.symtab self.compilationunit() if self.token().type == 'EOF': del self.lexer self.getoffsets() self.semcheck = True self.symtab.isfull = True self.lexer = lexer.lexer(self.thefilename, False) self.token = self.lexer.getToken self.tkgen = self.lexer.tokengenerator self.nexttoken = self.lexer.getNext semantic.syntaxer = self self.compilationunit() if DEBUG: semantic.Iprint() tcode.symtab = self.symtab scopes = set('g') for sym in self.symtab.table.iteritems(): scopes.add(sym[1].scope) if DEBUG: for scope in scopes: s = scope.split('.') print '-----------------------------------------------------------' print self.symtab.idsymfromlexscope(s[-1], '.'.join(s[0:-1])) print scope + '\n', self.symtab.symfromscope(scope) print '-----------------------------------------------------------' #for n in self.classnames : # print self.symtab.idsymfromlexscope(n, 'g') # print n + '\n', self.symtab.symfromscope('g.'+ n) asm = [] for code in semantic.Icode: for c in tcode.get(code): asm.append(c) return asm
def run(self) : semantic.symtab = self.symtab self.compilationunit() if self.token().type == 'EOF' : del self.lexer self.getoffsets() self.semcheck = True self.symtab.isfull = True self.lexer = lexer.lexer(self.thefilename, False) self.token = self.lexer.getToken self.tkgen = self.lexer.tokengenerator self.nexttoken = self.lexer.getNext semantic.syntaxer = self self.compilationunit() if DEBUG: semantic.Iprint() tcode.symtab = self.symtab scopes = set('g') for sym in self.symtab.table.iteritems() : scopes.add(sym[1].scope) if DEBUG : for scope in scopes : s = scope.split('.') print '-----------------------------------------------------------' print self.symtab.idsymfromlexscope(s[-1], '.'.join(s[0:-1])) print scope + '\n', self.symtab.symfromscope(scope) print '-----------------------------------------------------------' #for n in self.classnames : # print self.symtab.idsymfromlexscope(n, 'g') # print n + '\n', self.symtab.symfromscope('g.'+ n) asm = [] for code in semantic.Icode : for c in tcode.get(code) : asm.append(c) return asm
for tokens in prods: ( tree , next ) = _parse_prod( tokens , 0 ) if tree == None: if next < len( tokens ): print "Missing token after %d (%s)" % (next,tokens[next]) else: print "Parsing error near token %d" % next return None children.append( tree ) pos = next return [ "PRODUCTIONS" , children ] #/////////////////////////////////////////////////////////////////////////////// def parse( prods ): return _parse_productions( prods ) #/////////////////////////////////////////////////////////////////////////////// if __name__ == "__main__": import sys , lexer if len(sys.argv) <= 1: print "Usage: python parse.py <input_filename>" sys.exit( 1 ) input = open( sys.argv[1] , 'r' ).read() prods = input.split( '\n' ) prods_tokens = [] for p in prods: tokens = lexer.lexer( p ) if len( tokens ) > 0: prods_tokens.append( list( tokens ) ) print parse( prods_tokens )
def setUp(self): self.parser = parser() self.lexer = lexer()
def parse_file(self): from lexer import lexer, token from parser import parser from utils import function, debug_msg filename = self.filename entry = global_tbl_entry() print "parser: Lexing on file:", self.filename body = file(self.filename, 'rt').read() print 'parser: rORIGINAL:', repr(body) print print print 'parser: -------------TOKENS:------------------' lexer = lexer(body) parser = parser() func_name = None curly_brace = 0 small_brace = 0 args = "" for token in lexer: #first find a function name store id and lookahead if brace move to state print "parser: parsing token: ", token.get_value()," of type: ", token.get_type() if parser.get_state() == "Begin": print "parser: parser state Begin" if token.get_type() == "Id": parser.set_state("FuncName") func_name = token.get_value() elif parser.get_state() == "FuncName": type(token.get_value()) type(token.get_type()) if token.get_value() == "(": parser.set_state("FuncArgs") elif token.get_type() == "Id": parser.set_state("FuncName") func_name = token.get_value() else: parser.set_state("Begin") elif parser.get_state() == "FuncArgs": if token.get_value() == ")": parser.set_state("FuncBody") elif token.get_value() == ",": print "parser: Comma" elif token.get_type() == "Id": args+=token.get_value() else: print "parser: found: ", token.get_value()," while parser in state Args" #reset parser parser.set_state("Begin") elif parser.get_state() == "FuncBody": if token.get_value() == "{": #confirmed function update everything parser.set_state("Function") aFunction = function() aFunction.set_name(func_name) aFunction.set_args(args); print "parser: ***********Found a function by name : ", func_name, " **************************" curly_brace += 1 #insert function elif token.get_type() == "Id": parser.set_state("FuncName") func_name = token.get_value() else: parser.set_state("Begin") elif parser.get_state() == "Function": if token.get_value() == "}": curly_brace -= 1 if curly_brace == 0: print "parser: ********* Finished function: ",func_name ,"******************" #function ends update everything parser.set_state("Begin") #close messages for this func elif token.get_value() == "{": curly_brace += 1 elif token.get_type() == "Debug": parser.set_state("Debug") dbg_msg = debug_msg() print "MAcro Name ===================================================",token.get_value() dbg_msg.set_macro_name(token.get_value()) elif token.get_type() == "Entry/Exit": parser.set_state("DebugEntry/Exit") dbg_msg = debug_msg() print "MAcro Name ==================================================",token.get_value() dbg_msg.set_macro_name(token.get_value()) elif parser.get_state() == "Debug": if token.get_value() == "(": if small_brace == 0: parser.set_state("DbgMsg") small_brace += 1 elif parser.get_state() == "DbgMsg": if token.get_type() == "Quotes": dbg_msg.set_message(token.get_value()) elif token.get_value() == ")": small_brace -= 1 if small_brace == 0: print "parser: **** Finished one Debug message***** " insert_in_tbl(entry, dbg_msg, aFunction); parser.set_state("Function") else: parser.set_state("DbgMsgArgs") elif parser.get_state() == "DbgMsgArgs": if token.get_value() == ")": small_brace -= 1 if small_brace == 0: print "parser: **** Finished one Debug message***** " insert_in_tbl(entry, dbg_msg,aFunction); parser.set_state("Function") if token.get_value() == "(": small_brace += 1 if token.get_type() in ["Id","Quotes"]: dbg_msg.append_args(token.get_value()) print "parser: ======TESTING: Token value: ",token.get_value() print "parser: ======TESTING: Token type: ",token.get_type() print "parser: ***********all tables ***********************" print print "parser: -----------Rest-------------------" for val in entry.rest_in_list: print "parser: Function: ", val.get_function().get_func_name(), " Message: ", val.get_dbg_msg().get_message()," Debug Args: ", val.get_dbg_msg().get_args() print print "parser: ----------cmplt_msg_tbl--------------------" for hash_key in entry.cmplt_msg_tbl.keys(): val = entry.cmplt_msg_tbl[hash_key] print "parser: Function: ", val.get_function().get_func_name(), " Message: ", val.get_dbg_msg().get_message()," Debug Args: ", val.get_dbg_msg().get_args() print print "parser: ----------partial_msg_tbl--------------------" for hash_key in entry.partial_msg_tbl.keys(): print hash_key val = entry.partial_msg_tbl[hash_key] print "parser: Function: ", val.get_function().get_func_name(), " Message: ", val.get_dbg_msg().get_message()," Debug Args: ", val.get_dbg_msg().get_args() return entry
def main(): from lexer import lexer with open('fizzbuzz.mil') as f: src = f.read() tokens = lexer(src) pprint(parser(tokens))
defs.append(FunctionDef(symbol=local_vars.find(fname), body=fbody)) stat = statement(SymbolTable(symtab[:]+local_vars)) return Block(gl_sym=symtab, lc_sym=local_vars, defs=defs, body=stat) @logger def program() : '''Axiom''' global_symtab=SymbolTable() getsym() the_program = block(global_symtab) expect('period') return the_program if __name__ == '__main__' : from lexer import lexer, __test_program the_lexer=lexer(__test_program) res = program() print '\n', res, '\n' res.navigate(print_stat_list) from support import * node_list=get_node_list(res) for n in node_list : print type(n), id(n), '->', type(n.parent), id(n.parent) print '\nTotal nodes in IR:', len(node_list), '\n' res.navigate(lowering) node_list=get_node_list(res) print '\n', res, '\n'
if sym[0] == "ID": name = consume("ID", "identifier")[1] elif sym[0] in procKeys: name = consume(procKeys)[0] else: error("identifier or procedure keyword") consume("LPAREN", "open paren") actualparams(currScope) consume("RPAREN", "close paren") # FIRST: 'apply' # FOLLOW: = FOLLOW(term) def application(currScope): # -> "apply" ( fnkw | identifier ) "(" actualparams ")" . consume("APPLY", "apply") if sym[0] == "ID": name = consume("ID", "identifier")[1] elif sym[0] in funcKeys: name = consume(funcKeys)[0] else: error("identifier or function keyword") consume("LPAREN", "open paren") actualparams(currScope) consume("RPAREN", "close paren") ## Start It Up! ## lex = lexer.lexer(sys.stdin) # create token generator nextsym() # initialize sym print;print ast = program() # start parsing at the start symbol print str(ast)
# FOLLOW: "]", "infixop", ";", "[", ")", "," print "| "*depth + "abinding" if sym[0] == "ID": binding(depth+1) elif sym[0] in funcKeys: fkwbinding(depth+1) else: error("identifier or function keyword") def identifier(depth): # -> "identifier" # FIRST: "identifier" # FOLLOW: "=", "prefixop", "(", ")", "infixop", "[", "]", ";", "fnkw", "identifier", "typename", "," print "| "*depth + "identifier" name = consume("ID", "identifier")[1] print "| "*depth + "identifier: \"" + str(name) + "\"" def literal(depth): # -> "literal" # FIRST: "literal" # FOLLOW: "infixop", "[", "_", "]", ";", "," print "| "*depth + "literal" type, value = consume(literals, "literal (int, real, string)") print "| "*depth + "literal of type " + type + " = " + str(value) ## Start It Up! ## lex = lexer.lexer(sys.stdin) nextsym() program(0)
global_symtab=SymbolTable() # in ir.py, it return an object that represent the SymbolTable getsym() #here, row <17>; it populate the new_sym and new_value with the next_token the_program = block(global_symtab) #popola la systemTable con la funzione block expect('period') return the_program #ENTRY POINT if __name__ == '__main__' : from lexer import lexer from program import __test_program the_lexer=lexer(__test_program) #call the lexer on the program res = program() # print res # print '\n', res, '\n' res.navigate(print_stat_list) from support import * node_list=get_node_list(res) for n in node_list :