예제 #1
0
    def next_token(self):
        ch = self.ch
        to = None
        if ch == '=':
            to = Token(TokenType.ASSIGN, ch)
        elif ch == '+':
            to = Token(TokenType.PLUS, ch)
        elif ch == '-':
            to = Token(TokenType.MINUS, ch)
        elif ch == '(':
            to = Token(TokenType.LPAREN, ch)
        elif ch == ')':
            to = Token(TokenType.RPAREN, ch)
        elif ch == '{':
            to = Token(TokenType.LBRACE, ch)
        elif ch == '}':
            to = Token(TokenType.RBRACE, ch)
        elif ch == ',':
            to = Token(TokenType.COMMA, ch)
        elif ch == ';':
            to = Token(TokenType.SEMICOLON, ch)
        else:
            to = Token(TokenType.ILLEGAL, ch)

        self.read_char()
        return to
예제 #2
0
 def __init__(self, name, func, *args):
     Token.__init__(self)
     self.value = name
     self.func = func
     self.args = args
     self.arg_len = self.func.__code__.co_argcount
     self.has_unpack_args = len(
         self.func.__code__.co_varnames) > self.arg_len
예제 #3
0
    def test_simple_tokens(self):
        input = "=+-,(){};"

        tests = [
            Token(TokenType.ASSIGN, "="),
            Token(TokenType.PLUS, '+'),
            Token(TokenType.MINUS, '-'),
            Token(TokenType.COMMA, ','),
            Token(TokenType.LPAREN, '('),
            Token(TokenType.RPAREN, ')'),
            Token(TokenType.LBRACE, '{'),
            Token(TokenType.RBRACE, '}'),
            Token(TokenType.SEMICOLON, ';'),
        ]

        lexer_obj = Lexer(input)
        for t in tests:
            token = lexer_obj.next_token()
            self.assertEqual(token.type, t.type)
            self.assertEqual(token.literal, t.literal)
예제 #4
0
 def __init__(self, name, args, expr, env):
     Token.__init__(self)
     self.value = name
     self.args = [x for x in map(lambda x: x.value, args)]
     self.expr = expr
     self.env = env
예제 #5
0
    def tokenize(self, code):
        tokens = []
        last_token = None
        curr_token_pos_start = 0
        acceptors = PythonLexer.get_all_acceptors()
        pos = 0
        while pos < len(code):
            while acceptors:
                next_acceptors = []
                for acceptor in acceptors:
                    if acceptor.move_next(code[pos]):
                        if acceptor.isAccepted():
                            token_type = acceptor.current_state
                            if token_type not in FA.STATE_WITH_LOOK_AHEAD:
                                token_value = code[curr_token_pos_start:pos +
                                                   1]
                            else:
                                token_value = code[curr_token_pos_start:pos]
                            last_token = Token(token_type, token_value)
                        else:
                            next_acceptors.append(acceptor)
                acceptors = next_acceptors
                if acceptors:
                    pos += 1
                    if pos == len(code):
                        if not acceptors[0].isAccepted():
                            last_token = None
                        break
            if last_token:
                if last_token.type not in FA.STATE_WITH_LOOK_AHEAD:
                    pos += 1
                tokens.append(last_token)
                last_token = None
                acceptors = PythonLexer.get_all_acceptors()
                curr_token_pos_start = pos
            elif pos < len(code) and (code[pos] == ' '
                                      or code[pos - 1] == '\n'):
                pos += 1
                last_token = None
                acceptors = PythonLexer.get_all_acceptors()
                curr_token_pos_start = pos
            else:
                if re.fullmatch('^[\'\"].*\n$',
                                code[curr_token_pos_start:pos + 1],
                                flags=re.MULTILINE):
                    raise ValueError('''Tokens: {0}
                    \nUnclosed string start from {1} \n Value: {2}'''.format(
                        tokens, curr_token_pos_start,
                        code[curr_token_pos_start:pos]))
                elif re.fullmatch('^[(\'\'\')(\"\"\")](.|\n)*$',
                                  code[curr_token_pos_start:pos + 1],
                                  flags=re.MULTILINE):
                    raise ValueError('''Tokens: {0}
                    \nUnclosed multiline string start from {1} \n Value: {2}'''
                                     .format(tokens, curr_token_pos_start,
                                             code[curr_token_pos_start:pos]))
                else:
                    raise ValueError('''Tokens: {0}
                    \nUnexpected ID start from {1} \n Value: {2}'''.format(
                        tokens, curr_token_pos_start,
                        code[curr_token_pos_start:pos]))

        return tokens
예제 #6
0
 def create_operator(self, value, str_position):
     return Token(Tokens.OPERATOR, value, str_position)
예제 #7
0
 def create_number(self, value, str_position):
     return Token(Tokens.NUMBER, value, str_position)
예제 #8
0
 def create_right_parenthesis(self, str_position):
     return Token(Tokens.RIGHT_PARENTHESES, ")", str_position)
예제 #9
0
 def create_left_parenthesis(self, str_position):
     return Token(Tokens.LEFT_PARENTHESES, "(", str_position)
예제 #10
0
 def __init__(self, value):
     Token.__init__(self, value)
예제 #11
0
 def generate_tokens(self):
   while self.char is not None:
     if self.char in " \t\n":
       self.advance()
     elif self.char in digits:
       yield Token(TokenTypes.float_, self.generate_num())
     elif self.char == "+":
       self.advance()
       yield Token(TokenTypes.plus)
     elif self.char == "-":
       self.advance()
       yield Token(TokenTypes.minus)
     elif self.char == "*":
       self.advance()
       yield Token(TokenTypes.mul)
     elif self.char == "/":
       self.advance()
       yield Token(TokenTypes.div)
     elif self.char == "%":
       self.advance()
       yield Token(TokenTypes.mod)
     elif self.char == ",":
       self.advance()
       yield Token(TokenTypes.comma)
     elif self.char in "'\"":
       yield Token(TokenTypes.string, self.generate_string(self.char))
     elif self.char == "(":
       yield Token(TokenTypes.lparen)
       self.advance()
     elif self.char == ")":
       yield Token(TokenTypes.rparen)
       self.advance()
     else:
       yield Token(TokenTypes.word, self.generate_word())
예제 #12
0
 def __init__(self, value):
     try:
         value = int(value)
     except ValueError:
         value = float(value)
     Token.__init__(self, value)