def test_detect_error_line(self): i = """ A; /* comment */ "str ing """ le = Lexer(dedent(i).strip()) le.read_token() # A le.read_token() # ; le.read_token() # /* comment */ with self.assertRaises(AsirSyntaxError) as err: le.read_token() self.assertEqual(str(err.exception), "Expect: '\"', got: 'None' at line 5")
def test_error_andand_operator(self): i = "true & false" le = Lexer(i) le.read_token() # true with self.assertRaises(AsirSyntaxError): le.read_token()
def test_error_closing_comment(self): i = '"str ing ' le = Lexer(i) with self.assertRaises(AsirSyntaxError): le.read_token()
def test_read_token(self): input_text = """ A+ B = -123; (1 *--2) /-3++; Msg = " msg0 \\" 00" flag =! true?1:0!= false {eval(@pi)} //comment #define FLAG 1 /* comment c */ if(1) return 1; else {[a, 0]} """ want = [ [Token.WORD, "A"], [Token.OPERATOR, "+"], [Token.WORD, "B"], [Token.OPERATOR, "="], [Token.OPERATOR, "-"], [Token.WORD, "123"], [Token.SEMICOLON, ";"], [Token.LPAR, "("], [Token.WORD, "1"], [Token.OPERATOR, "*"], [Token.OPERATOR, "--"], [Token.WORD, "2"], [Token.RPAR, ")"], [Token.OPERATOR, "/"], [Token.OPERATOR, "-"], [Token.WORD, "3"], [Token.OPERATOR, "++"], [Token.SEMICOLON, ";"], [Token.WORD, "Msg"], [Token.OPERATOR, "="], [Token.STRING, '" msg0 \\" 00"'], [Token.WORD, "flag"], [Token.OPERATOR, "="], [Token.OPERATOR, "!"], [Token.WORD, "true"], [Token.OPERATOR, "?"], [Token.WORD, "1"], [Token.OPERATOR, ":"], [Token.WORD, "0"], [Token.OPERATOR, "!="], [Token.WORD, "false"], [Token.LBRACE, "{"], [Token.WORD, "eval"], [Token.LPAR, "("], [Token.WORD, "@pi"], [Token.RPAR, ")"], [Token.RBRACE, "}"], [Token.LINECOMMENT, "//comment"], [Token.DIRECTIVE, "#define FLAG 1"], [Token.BLOCKCOMMENT, "/*\ncomment c */"], [Token.WORD, "if"], [Token.LPAR, "("], [Token.WORD, "1"], [Token.RPAR, ")"], [Token.WORD, "return"], [Token.WORD, "1"], [Token.SEMICOLON, ";"], [Token.WORD, "else"], [Token.LBRACE, "{"], [Token.LBRACKET, "["], [Token.WORD, "a"], [Token.COMMA, ","], [Token.WORD, "0"], [Token.RBRACKET, "]"], [Token.RBRACE, "}"], ] le = Lexer(dedent(input_text).strip()) for tt, c in want: token = le.read_token() self.assertEqual(token.token_type, tt) self.assertEqual(token.content, c)
def test_error_closing_comment(self): i = "/* comment " le = Lexer(i) # https://docs.python.org/ja/3/library/unittest.html#unittest.TestCase.assertRaises with self.assertRaises(AsirSyntaxError): le.read_token()
class Beautifier: def __init__(self, input_text): self.le = Lexer(input_text) self.current_line = "" self.output_lines = [] self.depth = 0 def append_current_line(self): if len(self.current_line) == 0: # 空行は無視する return self.output_lines.append( ((" " * (self.depth * 4)) + self.current_line).rstrip()) self.current_line = "" def append_content(self, content, trailing=""): self.current_line += content + trailing def append_after_rstrip(self, content, trailing=""): self.current_line = self.current_line.rstrip(" ") + content + trailing def append_linecomment(self, comment, prev_token_type): self.append_content(comment) self.append_current_line() def append_blockcomment(self, comment): self.append_current_line() # 念のため if len(comment.splitlines()) == 1: # /* comment */ self.append_content(comment) self.append_current_line() else: self.append_content("/*") self.append_current_line() lines = comment[2:-2].splitlines() # /* com # ment */ # --> [com, ment] for ln in lines: if len(ln.strip()) >= 1: self.output_lines.append(ln.rstrip()) self.append_content("*/") self.append_current_line() # /* # com # ment # */ def beautify(self): prev = Token("", "") semicolon_cnt, inside_for = 0, False while not self.le.is_end(): t = self.le.read_token() if t.token_type == Token.LINECOMMENT: self.append_linecomment(t.content, prev.token_type) elif t.token_type == Token.BLOCKCOMMENT: self.append_blockcomment(t.content) elif t.token_type == Token.OPERATOR: if t.content == "!": self.append_content("!") # 前置 elif t.content in {"++", "--"}: if prev.token_type == Token.OPERATOR: self.append_content(t.content, " ") # ... * ++ else: self.append_after_rstrip(t.content, " ") # A++ など elif t.content == "-": if prev.token_type in { "", Token.COMMA, Token.SEMICOLON, Token.LPAR, }: self.append_content("-") # ... (- elif prev.content in {"=", "==", "<", "<=", ">", ">="}: self.append_content("-") # ... == - else: self.append_content("-", " ") else: self.append_content(t.content, " ") elif t.token_type == Token.LPAR: if prev.content in {"for", "if"}: self.append_content("(") # ... for ( elif prev.token_type == Token.WORD: # 関数呼び出し self.append_after_rstrip("(") # ... func( else: self.append_content("(") # ... + ( elif t.token_type == Token.RPAR: self.append_after_rstrip(")", " ") elif t.token_type == Token.LBRACE: self.append_content("{") self.append_current_line() self.depth += 1 elif t.token_type == Token.RBRACE: self.append_current_line() self.depth -= 1 self.append_content("}") self.append_current_line() elif t.token_type == Token.LBRACKET: if prev.token_type == Token.WORD: # 添字アクセス self.append_after_rstrip("[") # ... arr[ else: self.append_content("[") # ... = [ elif t.token_type == Token.RBRACKET: self.append_after_rstrip("]", " ") elif t.token_type == Token.COMMA: self.append_after_rstrip(",", " ") elif t.token_type == Token.SEMICOLON: if inside_for: semicolon_cnt += 1 if semicolon_cnt == 2: inside_for = False self.append_after_rstrip(";", " ") # for(a; b; else: self.append_after_rstrip(";") self.append_current_line() elif t.token_type == Token.END: self.append_after_rstrip("$") self.append_current_line() elif t.token_type == Token.STRING: self.append_content(t.content) elif t.token_type == Token.WORD: if t.content == "else": if self.output_lines[-1].lstrip(" ") == "}": self.output_lines.pop() self.append_content("}" + " " + "else", " ") # if (cond) { # # } else else: self.append_content("else", " ") # if (cond) return 1; # else else: if prev.content in {"++", "--"}: self.append_after_rstrip(t.content, " ") # ... ++a else: self.append_content(t.content, " ") if t.content == "for": inside_for = True semicolon_cnt = 0 elif t.token_type == Token.DIRECTIVE: if len(self.current_line) >= 1: self.append_current_line() self.output_lines.append(t.content) # インデント無し else: raise AsirSyntaxError( # ? "Unknown token. type: {}, content: '{}'".format( t.token_type, t.content)) prev = t if len(self.current_line) >= 1: self.append_current_line() return "\n".join(self.output_lines).strip()