forked from kureuil/12sh
/
ast.py
executable file
·328 lines (286 loc) · 11.5 KB
/
ast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
#!/usr/bin/env python2
import argparse
from sys import exit
import re
try:
from clang.cindex import CursorKind, Index
except:
print('You need libclang to be installed to run this program')
exit(1)
def get_info(node, source_file, depth=0):
children = [get_info(c, source_file, depth+1)
for c in node.get_children()]
if node.location.file and node.location.file.name != source_file:
return None
return { 'kind' : node.kind,
'spelling' : node.spelling,
'location' : node.location,
'extent.start' : node.extent.start,
'extent.end' : node.extent.end,
'is_definition' : node.is_definition(),
'children' : children,
'depth': depth }
def main():
parser = argparse.ArgumentParser(description='Checks the norme on C source files')
parser.add_argument('files', metavar='FILES', type=str, nargs='+',
help='source files to be processed')
parser.add_argument('-d', '--dump', action='store_true',
help='Dump file AST content')
args = parser.parse_args()
if len(args.files) == 0:
parser.error('invalid number arguments')
for source_file in args.files:
index = Index.create()
tu = index.parse(source_file, None)
if not tu:
parser.error("unable to load input")
infos = get_info(tu.cursor, source_file)
if args.dump:
rprint(infos)
linter = Linter(infos, source_file)
linter.lint()
class Linter():
def __init__(self, node, source_file):
self.head = node
self.filepath = source_file
self.function_def_count = 0
self.used_lines = []
self.lines = []
def error(self, node, message):
print('{}:{}:{}:{}'.format(
node['location'].file,
node['location'].line,
node['location'].column,
message
))
def ranged_error(self, node, message):
print('{}:{}:{}:{}:{}:{}'.format(
node['location'].file,
node['extent.start'].line,
node['extent.start'].column,
node['extent.end'].line,
node['extent.end'].column,
message
))
def lint(self):
self.lint_file()
self.do_lint(self.head, None)
def lint_file(self):
with open(self.filepath) as source:
contents = source.read().split('\n')
self.lines = contents
self.check_header()
for i in xrange(0, len(contents)):
self.check_length_limit(contents[i], i)
self.check_trailing_whitespace(contents[i], i)
def check_header(self):
def error():
print('{}:{}:{}:{}'.format(self.filepath, 1, 1, 'Invalid header'))
if not self.lines[0].startswith('/*'):
error()
return
for i in xrange(1, 8):
if not self.lines[i].startswith('**'):
error()
return
if not self.lines[8].startswith('*/'):
error()
return
def check_length_limit(self, line, line_no):
line_len = 0
if line.startswith('**'):
return
for i in xrange(0, len(line)):
if line[i] == '\t':
line_len += 8 - i % 8
else:
line_len += 1
if len > 80:
for i in xrange(80, line_len):
print('{}:{}:{}:{}'.format(
self.filepath,
line_no,
i,
'A line should not be longer than 80 characters'
))
def check_trailing_whitespace(self, line, line_no):
for i in xrange(len(line), 0, -1):
if line[i - 1] == ' ' or line[i - 1] == '\t':
print('{}:{}:{}:{}'.format(
self.filepath,
line_no,
i,
'Trailing whitespace at the end of line'
))
else:
break
def do_lint(self, node, parent, depth=0):
if depth == 1:
if node['kind'] == CursorKind.VAR_DECL:
if not node['spelling'].startswith('g_'):
self.ranged_error(
node,
'A global variable name must be prefixed by `g_`'
)
if node['kind'] == CursorKind.FUNCTION_DECL:
self.arguments_count = 0
self.function_def_count += 1
if self.function_def_count > 5 and node['location'].file.name.endswith('.c'):
self.error(
node,
'A file mustn\'t contain more than 5 functions'
)
if depth == 2:
if node['kind'] == CursorKind.COMPOUND_STMT:
flen = node['extent.end'].line - node['extent.start'].line
if flen > 26:
for i in xrange(26, flen):
self.error(
node,
'A function mustn\'t be longer than 25 lines'
)
if parent and parent['location'].line == node['location'].line:
self.error(
node,
'A compound statement must be on its own line'
)
if node['kind'] == CursorKind.PARM_DECL:
self.arguments_count += 1
if self.arguments_count > 4:
self.ranged_error(
node,
'A function mustn\'t take more than 4 arguments'
)
if depth > 2:
if parent['kind'] == CursorKind.RETURN_STMT:
if node['kind'] != CursorKind.PAREN_EXPR:
self.ranged_error(
node,
'A return statement must be followed by a parenthesis expr'
)
if node['kind'] == CursorKind.COMPOUND_STMT and (
parent['extent.start'].line == node['location'].line or parent['extent.end'].line == node['location'].line):
self.error(
node,
'A compound statement must be on its own line'
)
if node['kind'] == CursorKind.DO_STMT:
self.error(
node,
'Do statements are not allowed'
)
if node['kind'] == CursorKind.FOR_STMT:
self.error(
node,
'For statements are not allowed'
)
if node['kind'] == CursorKind.GOTO_STMT:
self.error(
node,
'Goto statements are not allowed'
)
if node['kind'] in (CursorKind.WHILE_STMT, CursorKind.IF_STMT):
line = self.lines[node['location'].line - 1]
col = node['location'].column - 1
while line[col] not in (' ', '\t'):
col += 1
whitespaces = 0
while line[col + whitespaces] in (' ', '\t'):
whitespaces += 1
if whitespaces > 1:
for i in xrange(1, whitespaces):
print('{}:{}:{}:{}'.format(
node['location'].file,
node['location'].line,
col + i,
'Keywords should be followed by one whitespace'
))
if node['kind'] == CursorKind.CONDITIONAL_OPERATOR:
if parent['kind'] != CursorKind.PAREN_EXPR:
self.ranged_error(
node,
'A ternary expression must be enclosed in parenthesis'
)
if node['kind'] in (CursorKind.VAR_DECL, CursorKind.FUNCTION_DECL):
if re.search('[A-Z]', node['spelling']):
self.error(
node,
'A name must contain only lowercase letters, numbers and underscores'
)
if node['kind'] == CursorKind.STRUCT_DECL and node['spelling']:
if not node['location'].file.name.endswith('.h'):
self.error(
node,
'Structures must be declared in header files'
)
if not node['spelling'].startswith('s_'):
self.error(
node,
'Structure names must be prefixed by `s_`'
)
if parent and parent['kind'] == CursorKind.TYPEDEF_DECL:
deduced_typename = 't' + node['spelling'][1:]
if parent['spelling'] != deduced_typename:
self.error(
parent,
'Structure typedef must match structure\'s name'
)
if node['kind'] == CursorKind.TYPEDEF_DECL:
if not node['location'].file.name.endswith('.h'):
self.error(
node,
'Typedef must be declared in header files'
)
if not node['spelling'].startswith('t_'):
self.error(
node,
'Typedef names must be prefixed by `t_`'
)
if node['kind'] == CursorKind.UNION_DECL and node['spelling']:
if not node['location'].file.name.endswith('.h'):
self.error(
node,
'Unions must be declared in header files'
)
if not node['spelling'].startswith('u_'):
self.error(
node,
'Union names must be prefixed by `u_`'
)
if node['kind'] == CursorKind.FUNCTION_DECL and node['spelling']:
if not node['location'].file.name.endswith('.h') and node['is_definition'] is False:
self.error(
node,
'Functions must be declared in header files'
)
if not node['location'].file.name.endswith('.c') and node['is_definition']:
sef.error(
node,
'Funtions must be defined in source files'
)
if parent and parent['kind'] == CursorKind.UNARY_OPERATOR:
if node['location'].line != parent['location'].line or node['location'].column != (parent['location'].column + 1):
self.error(
parent,
'Unary operator mustn\'t be followed by whitespaces'
)
for child in node['children']:
if child is not None:
self.do_lint(child, node, depth+1)
def iprint(str, depth):
print('{}{}'.format('\t' * depth, str))
def rprint(node, depth=0):
print('')
iprint('spelling: {}'.format(node['spelling']), depth)
iprint('kind: {}'.format(node['kind']), depth)
iprint('location: {}'.format(node['location']), depth)
iprint('extent.start: {}'.format(node['extent.start']), depth)
iprint('extent.end: {}'.format(node['extent.end']), depth)
iprint('is_definition: {}'.format(node['is_definition']), depth)
if len(node['children']):
iprint('children:', depth)
for child in node['children']:
if child is not None:
rprint(child, depth+1)
if __name__ == '__main__':
main()