/
lexer.py
93 lines (85 loc) · 2.72 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from tokens import Token
from types_ import *
from printf import printf
import string
letters = string.ascii_letters
digits = "0123456789"
keywords = [
"f"
]
class Lexer():
def __init__(self, text):
self.text = iter(text)
self.current_char = ''
self.advance()
def advance(self):
try:
self.current_char = next(self.text)
except StopIteration:
self.current_char = None
def make_number(self):
decimal_count = 0
num = self.current_char
self.advance()
while self.current_char != None and (self.current_char.isnumeric() or self.current_char == '.'):
if self.current_char == '.':
decimal_count += 1
if decimal_count > 1:
printf("ERROR: too many decimal points in one number")
num += '.'
elif self.current_char.isnumeric():
num += self.current_char
self.advance()
if '.' in num:
self.tokens.append(Token(TYPE_NUMBER, float(num)))
else:
self.tokens.append(Token(TYPE_NUMBER, int(num)))
def make_equ(self):
self.advance()
if self.current_char == '=' and self.current_char != None:
self.advance()
return self.tokens.append(Token(TYPE_EQUAL))
else:
self.advance()
return self.tokens.append(Token(TYPE_COLON))
def make_identifier(self):
id_string = self.current_char
self.advance()
while self.current_char != None and self.current_char in letters + digits + '_':
id_string += self.current_char
self.advance()
token_type = TYPE_KEYWORD if id_string in keywords else TYPE_IDENTIFIER
return self.tokens.append(Token(token_type, id_string))
def generate_tokens(self):
self.tokens = []
while self.current_char != None:
if self.current_char in " \t\n":
self.advance()
elif self.current_char.isnumeric():
self.make_number()
elif self.current_char == '+':
self.advance()
self.tokens.append(Token(TYPE_PLUS))
elif self.current_char == '-':
self.advance()
self.tokens.append(Token(TYPE_MINUS))
elif self.current_char == '*':
self.advance()
self.tokens.append(Token(TYPE_MULTIPLY))
elif self.current_char == '/':
self.advance()
self.tokens.append(Token(TYPE_DIVIDE))
elif self.current_char == '(':
self.advance()
self.tokens.append(Token(TYPE_LPAR))
elif self.current_char == ')':
self.advance()
self.tokens.append(Token(TYPE_RPAR))
elif self.current_char == ':':
self.make_equ()
elif self.current_char in letters or self.current_char == '_':
self.make_identifier()
else:
print(f"ERROR: character not allowed: {self.current_char}")
return "ERROR"
return self.tokens