예제 #1
0
    def make_tokens(self):
        tokens = []

        while self.char_now is not None:
            if self.char_now in " \t":  # skip spaces and tabs
                self.advance()
            elif self.char_now in DIGITS:
                tokens.append(self.make_number())
            elif self.char_now in get_chars_of(
                    "op"):  # operators e.g. + - * / ^
                tokens.append(
                    Token(get_chars_of("op")[self.char_now],
                          pos_start=self.pos))
                self.advance()
            elif self.char_now in get_chars_of("brac"):  # brackets e.g. ( )
                tokens.append(
                    Token(get_chars_of("brac")[self.char_now],
                          pos_start=self.pos))
                self.advance()
            else:  # Illegal Character
                pos_start = self.pos.copy()
                char = self.char_now
                self.advance()
                return [], IllegalCharError(pos_start, self.pos,
                                            f"'{char}' is not implemented.")

        tokens.append(get_eof_token(self.pos.copy()))
        return tokens, None  # None for no error
예제 #2
0
 def read_directive(self):
     begin = self.i
     while (
         self.ch and self.ch != "\n" and (not self.next_two_char() in {"//", "/*"})
     ):
         self.advance()
     return Token(Token.DIRECTIVE, self.text[begin : self.i].rstrip(" "))
예제 #3
0
 def add_token(self,
               type: TokenType,
               literal: object = None):
     text = self.source[self.start:self.current]
     self.tokens.append(
         Token(type=type, lexeme=text, literal=literal, line=self.line)
     )
예제 #4
0
    def __symbol(self):
        if self.current_char not in SYMBOL_LOOKUP:
            return

        if self.next_char not in SYMBOL_LOOKUP:
            self.__advance()
            return Token(SYMBOL_LOOKUP[self.last_char])

        symbol = self.current_char + self.next_char
        if symbol in SYMBOL_LOOKUP:
            self.__advance()
            self.__advance()
            return Token(SYMBOL_LOOKUP[symbol])

        self.__advance()
        return Token(SYMBOL_LOOKUP[self.last_char])
예제 #5
0
    def make_tokens(self):
        tokens = []

        while self.current_char != None:
            if self.current_char in ' \t':
                self.advance()
            elif self.current_char in DIGITS:
                tokens.append(self.make_number())
            elif self.current_char == '+':
                tokens.append(Token(TT_PLUS, pos_start=self.pos))
                self.advance()
            elif self.current_char == '-':
                tokens.append(Token(TT_MINUS, pos_start=self.pos))
                self.advance()
            elif self.current_char == '*':
                tokens.append(Token(TT_MUL, pos_start=self.pos))
                self.advance()
            elif self.current_char == '/':
                tokens.append(Token(TT_DIV, pos_start=self.pos))
                self.advance()
            elif self.current_char == '(':
                tokens.append(Token(TT_LPAREN, pos_start=self.pos))
                self.advance()
            elif self.current_char == ')':
                tokens.append(Token(TT_RPAREN, pos_start=self.pos))
                self.advance()
            else:
                pos_start = self.pos.copy()
                char = self.current_char
                self.advance()
                return [], IllegalCharError(pos_start, self.pos,
                                            "'" + char + "'")

        tokens.append(Token(TT_EOF, pos_start=self.pos))
        return tokens, None
예제 #6
0
    def make_number(self):
        num_str = ''
        dot_count = 0
        pos_start = self.pos.copy()

        while self.current_char != None and self.current_char in DIGITS + '.':
            if self.current_char == '.':
                if dot_count == 1:
                    break
                dot_count += 1
                num_str += '.'
            else:
                num_str += self.current_char
            self.advance()

        if dot_count == 0:
            return Token(TT_INT, int(num_str), pos_start, self.pos)
        else:
            return Token(TT_FLOAT, float(num_str), pos_start, self.pos)
예제 #7
0
    def scan(self):
        while not self.is_at_end():
            self.start = self.current
            self.scan_token()
        print(self.tokens)

        token = Token(
            type=TokenType.EOF, lexeme="",
            literal=None, line=self.line
        )
        self.tokens.append(token)
예제 #8
0
 def read_blockcomment(self):
     begin = self.i
     while self.ch and self.next_two_char() != "*/":
         self.advance()
     if self.next_two_char() != "*/":
         raise AsirSyntaxError(
             "Expect: '*/', got: '{}' at line {}".format(
                 self.next_two_char(), self.detect_line_number()
             )
         )
     self.advance()
     self.advance()
     return Token(Token.BLOCKCOMMENT, self.text[begin : self.i])
예제 #9
0
    def make_number(self):
        number_str = ""
        has_dot = False
        pos_start = self.pos.copy()

        while self.char_now is not None and self.char_now in DIGITS + ".":
            if self.char_now == ".":
                if has_dot: break
                number_str += "."
                has_dot = True
            else:
                number_str += self.char_now

            self.advance()

        if has_dot:
            return Token(
                get_chars_of("factor")["FLOAT"], float(number_str), pos_start,
                self.pos)
        else:
            return Token(
                get_chars_of("factor")["INT"], int(number_str), pos_start,
                self.pos)
예제 #10
0
 def read_string(self):
     begin = self.i
     self.advance()
     while self.ch and self.ch != '"':
         if self.ch == "\\":  # escape
             self.advance()
         self.advance()
     if self.ch != '"':
         raise AsirSyntaxError(
             "Expect: '\"', got: '{}' at line {}".format(
                 self.ch, self.detect_line_number()
             )
         )
     self.advance()
     return Token(Token.STRING, self.text[begin : self.i])
예제 #11
0
    def __word(self):
        result = ''
        if not self.current_char.isalpha():
            return

        result += self.current_char
        self.__advance()
        while self.current_char is not None and (self.current_char.isalnum()
                                                 or self.current_char == '_'):
            result += self.current_char
            self.__advance()

        key = result.lower()
        if key in RESERVED_LOOKUP:
            return Token(RESERVED_LOOKUP[key])
        else:
            return ValueToken(TOKEN_IDENTIFIER, result)
예제 #12
0
 def __end_of_file(self):
     if self.current_char is None:
         return Token(TOKEN_EOF)
예제 #13
0
 def __end_of_line(self):
     if self.current_char == '\n':
         self.__advance()
         return Token(TOKEN_EOL)
예제 #14
0
 def beautify(self):
     prev = Token("", "")
     semicolon_cnt, inside_for = 0, False
     while not self.le.is_end():
         t = self.le.read_token()
         if t.token_type == Token.LINECOMMENT:
             self.append_linecomment(t.content, prev.token_type)
         elif t.token_type == Token.BLOCKCOMMENT:
             self.append_blockcomment(t.content)
         elif t.token_type == Token.OPERATOR:
             if t.content == "!":
                 self.append_content("!")  # 前置
             elif t.content in {"++", "--"}:
                 if prev.token_type == Token.OPERATOR:
                     self.append_content(t.content, " ")  # ... * ++
                 else:
                     self.append_after_rstrip(t.content, " ")  # A++ など
             elif t.content == "-":
                 if prev.token_type in {
                         "",
                         Token.COMMA,
                         Token.SEMICOLON,
                         Token.LPAR,
                 }:
                     self.append_content("-")  # ... (-
                 elif prev.content in {"=", "==", "<", "<=", ">", ">="}:
                     self.append_content("-")  # ... == -
                 else:
                     self.append_content("-", " ")
             else:
                 self.append_content(t.content, " ")
         elif t.token_type == Token.LPAR:
             if prev.content in {"for", "if"}:
                 self.append_content("(")  # ... for (
             elif prev.token_type == Token.WORD:  # 関数呼び出し
                 self.append_after_rstrip("(")  # ... func(
             else:
                 self.append_content("(")  # ... + (
         elif t.token_type == Token.RPAR:
             self.append_after_rstrip(")", " ")
         elif t.token_type == Token.LBRACE:
             self.append_content("{")
             self.append_current_line()
             self.depth += 1
         elif t.token_type == Token.RBRACE:
             self.append_current_line()
             self.depth -= 1
             self.append_content("}")
             self.append_current_line()
         elif t.token_type == Token.LBRACKET:
             if prev.token_type == Token.WORD:  # 添字アクセス
                 self.append_after_rstrip("[")  # ... arr[
             else:
                 self.append_content("[")  # ... = [
         elif t.token_type == Token.RBRACKET:
             self.append_after_rstrip("]", " ")
         elif t.token_type == Token.COMMA:
             self.append_after_rstrip(",", " ")
         elif t.token_type == Token.SEMICOLON:
             if inside_for:
                 semicolon_cnt += 1
                 if semicolon_cnt == 2:
                     inside_for = False
                 self.append_after_rstrip(";", " ")  # for(a; b;
             else:
                 self.append_after_rstrip(";")
                 self.append_current_line()
         elif t.token_type == Token.END:
             self.append_after_rstrip("$")
             self.append_current_line()
         elif t.token_type == Token.STRING:
             self.append_content(t.content)
         elif t.token_type == Token.WORD:
             if t.content == "else":
                 if self.output_lines[-1].lstrip(" ") == "}":
                     self.output_lines.pop()
                     self.append_content("}" + " " + "else", " ")
                     # if (cond) {
                     #
                     # } else
                 else:
                     self.append_content("else", " ")
                     # if (cond) return 1;
                     # else
             else:
                 if prev.content in {"++", "--"}:
                     self.append_after_rstrip(t.content, " ")  # ... ++a
                 else:
                     self.append_content(t.content, " ")
                     if t.content == "for":
                         inside_for = True
                         semicolon_cnt = 0
         elif t.token_type == Token.DIRECTIVE:
             if len(self.current_line) >= 1:
                 self.append_current_line()
             self.output_lines.append(t.content)  # インデント無し
         else:
             raise AsirSyntaxError(  # ?
                 "Unknown token. type: {}, content: '{}'".format(
                     t.token_type, t.content))
         prev = t
     if len(self.current_line) >= 1:
         self.append_current_line()
     return "\n".join(self.output_lines).strip()
예제 #15
0
 def read_token(self):
     self.skip_whitespace()
     t = Token("", "")
     if self.next_two_char() == "//":
         return self.read_linecomment()
     if self.next_two_char() == "/*":
         return self.read_blockcomment()
     if self.ch in self.ops:
         if self.ch in {"?", ":"}:
             t = Token(Token.OPERATOR, self.ch)
         elif self.ch == "&":
             if self.next_char() != "&":
                 raise AsirSyntaxError(
                     "Expect: '&', got: '{}' at line {}".format(
                         self.next_char(), self.detect_line_number()
                     )
                 )
             t = Token(Token.OPERATOR, "&&")
             self.advance()
         elif self.ch == "|":
             if self.next_char() == "|":
                 t = Token(Token.OPERATOR, "||")
                 self.advance()
             else:
                 t = Token(Token.OPERATOR, "|")
         elif self.ch in {"+", "-"}:
             if self.next_char() in {self.ch, "="}:
                 t = Token(Token.OPERATOR, self.ch + self.next_char())
                 self.advance()
             else:
                 t = Token(Token.OPERATOR, self.ch)
         else:
             if self.next_char() == "=":
                 t = Token(Token.OPERATOR, self.ch + "=")
                 self.advance()
             else:
                 t = Token(Token.OPERATOR, self.ch)
     elif self.ch == "(":
         t = Token(Token.LPAR, "(")
     elif self.ch == ")":
         t = Token(Token.RPAR, ")")
     elif self.ch == "{":
         self.depth += 1
         t = Token(Token.LBRACE, "{")
     elif self.ch == "}":
         self.depth -= 1
         self.ensure_positive_depth()
         t = Token(Token.RBRACE, "}")
     elif self.ch == "[":
         t = Token(Token.LBRACKET, "[")
     elif self.ch == "]":
         t = Token(Token.RBRACKET, "]")
     elif self.ch == ",":
         t = Token(Token.COMMA, ",")
     elif self.ch == ";":
         t = Token(Token.SEMICOLON, ";")
     elif self.ch == "$":
         t = Token(Token.END, "$")
     elif self.ch == '"':
         return self.read_string()
     elif self.ch == "#":
         return self.read_directive()
     else:
         return self.read_word()
     self.advance()
     return t
예제 #16
0
 def read_word(self):
     begin = self.i
     while self.ch and (not self.ch in (self.delims | {'"', " ", "\n", "\t"})):
         self.advance()
     assert self.ch is None or begin < self.i
     return Token(Token.WORD, self.text[begin : self.i])
예제 #17
0
 def read_linecomment(self):
     begin = self.i
     while self.ch and self.ch != "\n":
         self.advance()
     return Token(Token.LINECOMMENT, self.text[begin : self.i])