def test_operator(self): src = "1+2*(3/4)" dst = "" lexer = pyauparser.Lexer(self.grammar_operator) lexer.load_string(src) while True: token = lexer.read_token() if token.symbol == self.grammar_operator.symbol_Error: raise Exception() elif token.symbol == self.grammar_operator.symbol_EOF: break dst += token.lexeme self.assertEqual(src, dst)
def test_comment(self): lexer = pyauparser.Lexer(self.grammar_group) lexer.load_string( """ a // Comment /* Block /* Comment "*/ b /* " */ (* Block (* Comment2 *) " *) c [* " *] " [* *] *] d """) tokens = lexer.read_token_all() terminals = [t.lexeme for t in tokens if t.symbol.type == pyauparser.SymbolType.TERMINAL] self.assertEqual(terminals, ['a', 'b', 'c', 'd'])
def main(): # tokenize an input string by lexer with grammar loaded # you can know how to use lexer manually g = pyauparser.Grammar.load_file("data/operator.egt") lexer = pyauparser.Lexer(g) lexer.load_file("data/operator_sample_1.txt") while True: token = lexer.read_token() print((token.symbol.name, token.lexeme, token.position)) if token.symbol.type == pyauparser.SymbolType.END_OF_FILE: break elif token.symbol.type == pyauparser.SymbolType.ERROR: print("ERROR({0}:{1}): Unknown Token '{0}'".format( token.position[0], token.position[1], token.lexeme)) return print("done", lexer.position)
def test_html(self): lexer = pyauparser.Lexer(self.grammar_group) lexer.load_string( """ a=none, b= <html> <head> <title>Some page</title> </head> </body> This is a tad easier than concatenating a series of strings! </body> </html> """) tokens = lexer.read_token_all() terminals = [t.lexeme for t in tokens if t.symbol.type == pyauparser.SymbolType.TERMINAL] self.assertEqual(terminals[:-1], ['a', '=', 'none', ',', 'b', '=']) self.assertEqual(terminals[-1][:6], "<html>") self.assertEqual(terminals[-1][-7:], "</html>")
def c_lex(cmd_args): opts, args = getopt.getopt(cmd_args, "e:") encoding = None for o, a in opts: if o == "-e": encoding = a egt_path = args[0] data_path = args[1] g = pyauparser.Grammar.load_file(egt_path) lexer = pyauparser.Lexer(g) lexer.load_file(data_path, encoding=encoding) while True: position, token = lexer.position, lexer.read_token() print((token.symbol.name, token.lexeme, position)) if token.symbol.type == pyauparser.SymbolType.END_OF_FILE: break elif token.symbol.type == pyauparser.SymbolType.ERROR: print("ERROR:", token.lexeme) return