/
lexer.py
133 lines (122 loc) · 4.35 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from tabletext import to_text
import table
def lexer(file_name):
white_space = [8, 9, 10, 13, 32]
chars = [i for i in range(65, 90)]
digits = [i for i in range(48, 57)]
s_separators = [i for i in table.s_sep_dic.keys()]
key_words = [i for i in table.key_dic.keys()]
line = ''
lex_list = []
lex_list_out = []
counter_idns = 1001
counter_digits = 501
counter_col = 1
counter_row = 1
row = 1
col = 1
file = open(file_name)
ch = file.read(1)
while ch:
if ord(ch) in white_space:
counter_col += 1
if ch == "\n":
counter_row += 1
counter_col = 1
ch = file.read(1)
elif ord(ch) in chars:
line += ch
col = counter_col
ch = file.read(1)
counter_col += 1
while ch and (ord(ch) in chars or ord(ch) in digits):
line += ch
ch = file.read(1)
counter_col += 1
if line != '':
if line in key_words:
lex_list.append([line, table.key_dic[line], counter_row, col])
lex_list_out.append(table.key_dic[line])
line = ''
else:
if line in table.idn_dic.keys():
lex_list.append([line, table.idn_dic[line], counter_row, col])
lex_list_out.append(table.idn_dic[line])
line = ''
else:
table.idn_dic[line] = counter_idns
lex_list.append([line, table.idn_dic[line], counter_row, col])
lex_list_out.append(table.idn_dic[line])
counter_idns += 1
line = ''
elif ord(ch) in digits:
col = counter_col
line += ch
ch = file.read(1)
counter_col += 1
while ord(ch) in digits:
line += ch
ch = file.read(1)
if line in table.dig_dic.keys():
lex_list.append([line, table.dig_dic[line], counter_row, col])
lex_list_out.append(table.dig_dic[line])
else:
table.dig_dic[line] = counter_digits
lex_list.append([line, table.dig_dic[line], counter_row, col])
lex_list_out.append(table.dig_dic[line])
counter_digits += 1
line = ''
counter_col += 1
elif ord(ch) == 40:
col = counter_col
line = ch
ch = file.read(1)
counter_col += 1
if ch == "*":
flag_comment = 0
ch = file.read(1)
counter_col += 1
while ch:
if ch == "*":
ch = file.read(1)
counter_col += 1
if ch == ")":
ch = file.read(1)
counter_col += 1
flag_comment = 1
break
else:
ch = file.read(1)
counter_col += 1
if ch == "\n":
counter_row = 1
if flag_comment == 0:
print("Lexical error: unclosed comment")
# lex_list = []
# break
else:
lex_list.append([line, table.s_sep_dic[line], counter_row, col])
lex_list_out.append(table.s_sep_dic[line])
line = ''
# ch = file.read(1)
line = ''
elif ch in s_separators:
col = counter_col
line = ch
counter_col += 1
lex_list.append([line, table.s_sep_dic[line], counter_row, col])
lex_list_out.append(table.s_sep_dic[line])
line = ''
ch = file.read(1)
else:
print("Lexical error at line " + str(counter_row) + ", position " + str(counter_col) + ': unknown symbol \"' + ch + '\"')
# lex_list = []
ch = file.read(1)
counter_col += 1
if lex_list != []:
a = to_text(lex_list)
print(a)
file.close()
ret_list = [lex_list, lex_list_out]
# print(ret_list)
return ret_list