Python Lexerの例、_lexer.Lexer Pythonの例

コード例 #1

0

ファイルを表示

def lexer_test():
    text = open(FILE_EXAMPLE, 'r').read()
    lexer = Lexer(text)
    token = lexer.get_next_token()
    while token.type is not SY_EOF:
        print(token)
        token = lexer.get_next_token()

コード例 #2

0

ファイルを表示

def generate_test():
    text = open(FILE_EXAMPLE, 'r').read()
    lexer = Lexer(text)
    parser = Parser(lexer)
    program = parser.program('example')
    generator = Generator(program)
    print(generator.generate())

コード例 #3

0

ファイルを表示

ファイル: test_lexer.py プロジェクト: ia7ck/pie-editor

 def test_detect_error_line(self):
     i = """
     A;
     /*
       comment
     */
     "str ing
     """
     le = Lexer(dedent(i).strip())
     le.read_token()  # A
     le.read_token()  # ;
     le.read_token()  # /* comment */
     with self.assertRaises(AsirSyntaxError) as err:
         le.read_token()
     self.assertEqual(str(err.exception),
                      "Expect: '\"', got: 'None' at line 5")

コード例 #4

0

ファイルを表示

ファイル: _stmt.py プロジェクト: wei2006004/DingTry

    def else_stmt(self, parent):
        self.eat_kw(KW_ELSE)
        stmt, block = self.stmt_or_block(parent)
        return ELSE(stmt, block)

    def stmt_or_block(self, parent):
        block = None
        stmt = None
        if self.current_token.value is SY_LBRACE:
            block = self.block(parent, parent.package)
        else:
            stmt = self.stmt(parent)
        return stmt, block

    def bool_expr(self, block):
        self.eat_sy(SY_LPAREN)
        expr = self.expr(block)
        self.eat_sy(SY_RPAREN)
        return expr


if __name__ == '__main__':
    from _lexer import Lexer

    stmts = open('stmt.ding', 'r').read()
    lexer = Lexer(stmts)
    parser = StmtParser(lexer)
    ret = parser.block(None, '')
    print(stmts)
    print(ret)

コード例 #5

0

ファイルを表示

        name = ''
        if self.is_type(self.current_token, block):
            self.eat_type()
            name = self.current_token.value
            self.eat_id()
        else:
            self.error('argument synatx error')
        while self.current_token.value != SY_RPAREN:
            ret.append((type, name))
            self.eat(SY_COMMA)
            type = self.current_token
            if self.is_type(self.current_token, block):
                self.eat_type()
                name = self.current_token.value
                self.eat_id()
            else:
                self.error('argument synatx error')
        ret.append((type, name))
        return ret


if __name__ == '__main__':
    from _lexer import Lexer

    text = open('class.ding', 'r').read()
    lexer = Lexer(text)
    parser = ClassParser(lexer)
    ret = parser.clazz(Block(None, '', BLOCK_TYPE_FILE))
    print(text)
    print(ret)

コード例 #6

0

ファイルを表示

def parser_test():
    text = open(FILE_EXAMPLE, 'r').read()
    lexer = Lexer(text)
    parser = Parser(lexer)
    program = parser.program('example')
    print(repr(program))

コード例 #7

0

ファイルを表示

ファイル: test_lexer.py プロジェクト: ia7ck/pie-editor

 def test_error_andand_operator(self):
     i = "true & false"
     le = Lexer(i)
     le.read_token()  # true
     with self.assertRaises(AsirSyntaxError):
         le.read_token()

コード例 #8

0

ファイルを表示

ファイル: test_lexer.py プロジェクト: ia7ck/pie-editor

 def test_error_closing_comment(self):
     i = '"str ing '
     le = Lexer(i)
     with self.assertRaises(AsirSyntaxError):
         le.read_token()

コード例 #9

0

ファイルを表示

ファイル: test_lexer.py プロジェクト: ia7ck/pie-editor

 def test_read_token(self):
     input_text = """
         A+ B = -123;
         (1 *--2)   /-3++;
         Msg = "  msg0 \\" 00"
         flag =! true?1:0!=   false
         {eval(@pi)}
         //comment
         #define FLAG 1 /*
         comment c */
         if(1) return 1; else {[a, 0]}
     """
     want = [
         [Token.WORD, "A"],
         [Token.OPERATOR, "+"],
         [Token.WORD, "B"],
         [Token.OPERATOR, "="],
         [Token.OPERATOR, "-"],
         [Token.WORD, "123"],
         [Token.SEMICOLON, ";"],
         [Token.LPAR, "("],
         [Token.WORD, "1"],
         [Token.OPERATOR, "*"],
         [Token.OPERATOR, "--"],
         [Token.WORD, "2"],
         [Token.RPAR, ")"],
         [Token.OPERATOR, "/"],
         [Token.OPERATOR, "-"],
         [Token.WORD, "3"],
         [Token.OPERATOR, "++"],
         [Token.SEMICOLON, ";"],
         [Token.WORD, "Msg"],
         [Token.OPERATOR, "="],
         [Token.STRING, '"  msg0 \\" 00"'],
         [Token.WORD, "flag"],
         [Token.OPERATOR, "="],
         [Token.OPERATOR, "!"],
         [Token.WORD, "true"],
         [Token.OPERATOR, "?"],
         [Token.WORD, "1"],
         [Token.OPERATOR, ":"],
         [Token.WORD, "0"],
         [Token.OPERATOR, "!="],
         [Token.WORD, "false"],
         [Token.LBRACE, "{"],
         [Token.WORD, "eval"],
         [Token.LPAR, "("],
         [Token.WORD, "@pi"],
         [Token.RPAR, ")"],
         [Token.RBRACE, "}"],
         [Token.LINECOMMENT, "//comment"],
         [Token.DIRECTIVE, "#define FLAG 1"],
         [Token.BLOCKCOMMENT, "/*\ncomment c */"],
         [Token.WORD, "if"],
         [Token.LPAR, "("],
         [Token.WORD, "1"],
         [Token.RPAR, ")"],
         [Token.WORD, "return"],
         [Token.WORD, "1"],
         [Token.SEMICOLON, ";"],
         [Token.WORD, "else"],
         [Token.LBRACE, "{"],
         [Token.LBRACKET, "["],
         [Token.WORD, "a"],
         [Token.COMMA, ","],
         [Token.WORD, "0"],
         [Token.RBRACKET, "]"],
         [Token.RBRACE, "}"],
     ]
     le = Lexer(dedent(input_text).strip())
     for tt, c in want:
         token = le.read_token()
         self.assertEqual(token.token_type, tt)
         self.assertEqual(token.content, c)

コード例 #10

0

ファイルを表示

ファイル: test_lexer.py プロジェクト: ia7ck/pie-editor

 def test_error_closing_comment(self):
     i = "/* comment "
     le = Lexer(i)
     # https://docs.python.org/ja/3/library/unittest.html#unittest.TestCase.assertRaises
     with self.assertRaises(AsirSyntaxError):
         le.read_token()

コード例 #11

0

ファイルを表示

ファイル: beautifier.py プロジェクト: ia7ck/pie-editor

 def __init__(self, input_text):
     self.le = Lexer(input_text)
     self.current_line = ""
     self.output_lines = []
     self.depth = 0

コード例 #12

0

ファイルを表示

ファイル: beautifier.py プロジェクト: ia7ck/pie-editor

class Beautifier:
    def __init__(self, input_text):
        self.le = Lexer(input_text)
        self.current_line = ""
        self.output_lines = []
        self.depth = 0

    def append_current_line(self):
        if len(self.current_line) == 0:  # 空行は無視する
            return
        self.output_lines.append(
            ((" " * (self.depth * 4)) + self.current_line).rstrip())
        self.current_line = ""

    def append_content(self, content, trailing=""):
        self.current_line += content + trailing

    def append_after_rstrip(self, content, trailing=""):
        self.current_line = self.current_line.rstrip(" ") + content + trailing

    def append_linecomment(self, comment, prev_token_type):
        self.append_content(comment)
        self.append_current_line()

    def append_blockcomment(self, comment):
        self.append_current_line()  # 念のため
        if len(comment.splitlines()) == 1:  # /* comment */
            self.append_content(comment)
            self.append_current_line()
        else:
            self.append_content("/*")
            self.append_current_line()
            lines = comment[2:-2].splitlines()
            # /*  com
            #       ment */
            # --> [com, ment]
            for ln in lines:
                if len(ln.strip()) >= 1:
                    self.output_lines.append(ln.rstrip())
            self.append_content("*/")
            self.append_current_line()
            # /*
            #   com
            #       ment
            # */

    def beautify(self):
        prev = Token("", "")
        semicolon_cnt, inside_for = 0, False
        while not self.le.is_end():
            t = self.le.read_token()
            if t.token_type == Token.LINECOMMENT:
                self.append_linecomment(t.content, prev.token_type)
            elif t.token_type == Token.BLOCKCOMMENT:
                self.append_blockcomment(t.content)
            elif t.token_type == Token.OPERATOR:
                if t.content == "!":
                    self.append_content("!")  # 前置
                elif t.content in {"++", "--"}:
                    if prev.token_type == Token.OPERATOR:
                        self.append_content(t.content, " ")  # ... * ++
                    else:
                        self.append_after_rstrip(t.content, " ")  # A++ など
                elif t.content == "-":
                    if prev.token_type in {
                            "",
                            Token.COMMA,
                            Token.SEMICOLON,
                            Token.LPAR,
                    }:
                        self.append_content("-")  # ... (-
                    elif prev.content in {"=", "==", "<", "<=", ">", ">="}:
                        self.append_content("-")  # ... == -
                    else:
                        self.append_content("-", " ")
                else:
                    self.append_content(t.content, " ")
            elif t.token_type == Token.LPAR:
                if prev.content in {"for", "if"}:
                    self.append_content("(")  # ... for (
                elif prev.token_type == Token.WORD:  # 関数呼び出し
                    self.append_after_rstrip("(")  # ... func(
                else:
                    self.append_content("(")  # ... + (
            elif t.token_type == Token.RPAR:
                self.append_after_rstrip(")", " ")
            elif t.token_type == Token.LBRACE:
                self.append_content("{")
                self.append_current_line()
                self.depth += 1
            elif t.token_type == Token.RBRACE:
                self.append_current_line()
                self.depth -= 1
                self.append_content("}")
                self.append_current_line()
            elif t.token_type == Token.LBRACKET:
                if prev.token_type == Token.WORD:  # 添字アクセス
                    self.append_after_rstrip("[")  # ... arr[
                else:
                    self.append_content("[")  # ... = [
            elif t.token_type == Token.RBRACKET:
                self.append_after_rstrip("]", " ")
            elif t.token_type == Token.COMMA:
                self.append_after_rstrip(",", " ")
            elif t.token_type == Token.SEMICOLON:
                if inside_for:
                    semicolon_cnt += 1
                    if semicolon_cnt == 2:
                        inside_for = False
                    self.append_after_rstrip(";", " ")  # for(a; b;
                else:
                    self.append_after_rstrip(";")
                    self.append_current_line()
            elif t.token_type == Token.END:
                self.append_after_rstrip("$")
                self.append_current_line()
            elif t.token_type == Token.STRING:
                self.append_content(t.content)
            elif t.token_type == Token.WORD:
                if t.content == "else":
                    if self.output_lines[-1].lstrip(" ") == "}":
                        self.output_lines.pop()
                        self.append_content("}" + " " + "else", " ")
                        # if (cond) {
                        #
                        # } else
                    else:
                        self.append_content("else", " ")
                        # if (cond) return 1;
                        # else
                else:
                    if prev.content in {"++", "--"}:
                        self.append_after_rstrip(t.content, " ")  # ... ++a
                    else:
                        self.append_content(t.content, " ")
                        if t.content == "for":
                            inside_for = True
                            semicolon_cnt = 0
            elif t.token_type == Token.DIRECTIVE:
                if len(self.current_line) >= 1:
                    self.append_current_line()
                self.output_lines.append(t.content)  # インデント無し
            else:
                raise AsirSyntaxError(  # ?
                    "Unknown token. type: {}, content: '{}'".format(
                        t.token_type, t.content))
            prev = t
        if len(self.current_line) >= 1:
            self.append_current_line()
        return "\n".join(self.output_lines).strip()