Beispiel #1
0
 def test_operator(self):
     src = "1+2*(3/4)"
     dst = ""
     lexer = pyauparser.Lexer(self.grammar_operator)
     lexer.load_string(src)
     while True:
         token = lexer.read_token()
         if token.symbol == self.grammar_operator.symbol_Error:
             raise Exception()
         elif token.symbol == self.grammar_operator.symbol_EOF:
             break
         dst += token.lexeme
     self.assertEqual(src, dst)
Beispiel #2
0
    def test_comment(self):
        lexer = pyauparser.Lexer(self.grammar_group)
        lexer.load_string(
            """
                a // Comment
                /* Block /* Comment "*/ b /* " */
                (* Block (* Comment2 *) " *) c
                [* " *] " [* *] *] d
            """)

        tokens = lexer.read_token_all()
        terminals = [t.lexeme for t in tokens if t.symbol.type == pyauparser.SymbolType.TERMINAL]
        self.assertEqual(terminals, ['a', 'b', 'c', 'd'])
Beispiel #3
0
def main():
    # tokenize an input string by lexer with grammar loaded
    # you can know how to use lexer manually

    g = pyauparser.Grammar.load_file("data/operator.egt")

    lexer = pyauparser.Lexer(g)
    lexer.load_file("data/operator_sample_1.txt")

    while True:
        token = lexer.read_token()
        print((token.symbol.name, token.lexeme, token.position))
        if   token.symbol.type == pyauparser.SymbolType.END_OF_FILE:
            break
        elif token.symbol.type == pyauparser.SymbolType.ERROR:
            print("ERROR({0}:{1}): Unknown Token '{0}'".format(
                token.position[0], token.position[1], token.lexeme))
            return

    print("done", lexer.position)
Beispiel #4
0
    def test_html(self):
        lexer = pyauparser.Lexer(self.grammar_group)
        lexer.load_string(
            """
                a=none, 
                b=
                <html>
                   <head>
                     <title>Some page</title>
                   </head>
                   </body>
                     This is a tad easier than concatenating a series of strings!
                   </body>
                </html>
            """)

        tokens = lexer.read_token_all()
        terminals = [t.lexeme for t in tokens if t.symbol.type == pyauparser.SymbolType.TERMINAL]
        self.assertEqual(terminals[:-1], ['a', '=', 'none', ',', 'b', '='])
        self.assertEqual(terminals[-1][:6], "<html>")
        self.assertEqual(terminals[-1][-7:], "</html>")
Beispiel #5
0
def c_lex(cmd_args):
    opts, args = getopt.getopt(cmd_args, "e:")

    encoding = None
    for o, a in opts:
        if o == "-e":
            encoding = a

    egt_path = args[0]
    data_path = args[1]

    g = pyauparser.Grammar.load_file(egt_path)
    lexer = pyauparser.Lexer(g)
    lexer.load_file(data_path, encoding=encoding)

    while True:
        position, token = lexer.position, lexer.read_token()
        print((token.symbol.name, token.lexeme, position))
        if token.symbol.type == pyauparser.SymbolType.END_OF_FILE:
            break
        elif token.symbol.type == pyauparser.SymbolType.ERROR:
            print("ERROR:", token.lexeme)
            return