-
Notifications
You must be signed in to change notification settings - Fork 0
/
rdparse.py
261 lines (246 loc) · 7.8 KB
/
rdparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#
# Parse a rule using a (hopefully) simple recursive descend parser.
# It's given a string (which it lexes through lexr.py) and an object
# with information about the terminals, and returns a root object that
# has a eval() method.
#
# The operators we support are, in precedence (high to low):
# !/NOT and ( ... ), 'implicit OR', AND, EXCEPT
# Operator parsing is left to right, so 'a EXCEPT b EXCEPT c' is
# '(a) EXCEPT ((b) EXCEPT (c))'.
# The operands are matcher invocations, which have three forms:
# MATCHER: ARGUMENT
# MATCHER
# ARGUMENT
# In the last case, a series of default matchers is tried in order; the
# first one that likes the
#
# The TermInfo object is expect to have three variables:
# terminals A dictionary; the keys are the names of matchers
# (with the ':' included for those that take an
# argument) and the value is a function to call
# that will return an appropriate matcher object.
# Usually the function is a class (thus calling it
# naturally creates the objects).
# The functions are called with two arguments, the
# matcher name and the argument (the argument is
# None when the matcher takes no arguments).
# error The error thrown by matcher functions when they
# do not like their arguments.
# defterminals The list of default matcher names to try for
# solo arguments.
#
import lexr
# These values are constants.
EXCEPTNODE = (lexr.T("EXCEPT"),)
ANDNODE = (lexr.T("AND"), lexr.T("&&"))
NOTNODE = (lexr.T("!"), lexr.T("NOT"))
OBRACKET = lexr.T("(")
CBRACKET = lexr.T(")")
# Thrown on exception
class ParseError(Exception):
pass
# Each operator, including implicit or, generates a node that will eval
# itself to decide on true or false. It will also print out its canonical
# form.
class NotNode:
def __init__(self, op):
self.op = op
def __str__(self):
return '!(%s)' % (str(self.op),)
def eval(self, data):
return not self.op.eval(data)
class OrNode:
def __init__(self, lst):
self.ops = lst
def __str__(self):
return "%s" % (" ".join(map(str, self.ops)))
def eval(self, data):
for e in self.ops:
if e.eval(data):
return 1
return 0
class AndNode:
def __init__(self, left, right):
self.left = left
self.right = right
def __str__(self):
return "(%s) AND (%s)" % (str(self.left), str(self.right))
def eval(self, data):
return self.left.eval(data) and self.right.eval(data)
class ExceptNode:
def __init__(self, left, right):
self.left = left
self.right = right
def __str__(self):
return "(%s) EXCEPT (%s)" % (str(self.left), str(self.right))
def eval(self, data):
return self.left.eval(data) and not self.right.eval(data)
# Pretty representation of a token tuple.
def pretty(token):
if token[0] == '':
return 'EOL'
else:
return token[1]
# We use a class for this to capture the lexer list and the information
# about the terminals.
class Parse:
def __init__(self, lexlst, terminfo):
self.lex = lexlst
self.terms = terminfo
# These are assistants for terml.
def parsenot(self):
# Always called with
self.lex.pop(0)
res = self.parseterm()
if not res:
raise ParseError, "expecting term, got %s" % (pretty(self.lex[0]),)
# Our finalization protocol is that any bare term object will
# have .finalize() called on it (if such a routine exists).
# Normally they get finalized in orlist processing, but not
# calls parseterm() directly and so must finalize itself
# when necessary.
if hasattr(res, "finalize"):
try:
res.finalize()
except self.terms.error, e:
raise ParseError, str(e)
return NotNode(res)
def parsebrackets(self):
self.lex.pop(0)
root = self.parseexcept()
if self.lex[0] != CBRACKET:
raise ParseError, "expecting closing ), got %s" % (pretty(self.lex[0]),)
self.lex.pop(0)
return root
# Terminal node:
# terml -> ! terml
# ( except )
# TERMINAL: VALUE
# TERMINAL-OR-VALUE
def parseterm(self):
if self.lex[0] in NOTNODE:
return self.parsenot()
elif self.lex[0] == OBRACKET:
return self.parsebrackets()
elif self.lex[0][0] != "W":
# If it is not a word but a token (or EOF), we don't
# handle it and we punt.
return None
term = self.lex.pop(0)[1]
# Now we need to distinguish TERMINAL: VALUE from the others.
# Terminals with values end with :; everything else doesn't.
if term[-1] == ':':
if self.lex[0][0] != 'W':
raise ParseError, "expected argument for %s, got %s" % (term, pretty(self.lex[0]))
val = self.lex.pop(0)[1]
# We check explicitly for has_key, instead of just
# trying and catching KeyError, so that we can tell
# our KeyErrors from any KeyErrors the function we
# call might generate. *Those* KeyErrors we want
# to cause explosions.
if term not in self.terms.terminals:
raise ParseError, "no handler called "+term
try:
return self.terms.terminals[term](term, val)
except self.terms.error, e:
raise ParseError, "handler does not like %s %s: %s" % (term, val, str(e))
# This is either a bare terminal or a value that needs the
# tender loving attention of the default handlers.
if term in self.terms.terminals:
try:
return self.terms.terminals[term](term, None)
except self.terms.error, e:
raise ParseError, "no-value handler %s refused us: %s" % (term, str(e))
else:
# There's no terminal by that name, so try to generate
# a default value.
if not self.terms.defaultterm:
raise ParseError, "no handler called "+term
try:
return self.terms.defaultterm(term)
except self.terms.error, e:
raise ParseError, "no default for %s: %s" % (term, str(e))
# orl -> terml orl
# Note that we cheat by not recursing.
def parseorlist(self):
# pychecker is WRONG about 'last has no merge attribute'.
# for some reason the thing requires 'no-constCond' too
# in order to SHUT UP.
__pychecker__ = "no-constCond no-objattrs"
lst = []
last = None
while 1:
r = self.parseterm()
if not r:
break
# HACK: See if the elements are mergeable. If they
# are, just discard the new one and go on. The merge
# protocol is that there is a 'merge' function and it
# returns true.
if last is not None and last.merge(r):
continue
elif last:
try:
last.finalize()
except self.terms.error, e:
raise ParseError, str(e)
last = None
# last does not get set unless it has signs of
# the protocol.
if hasattr(r, 'merge'):
last = r
lst.append(r)
if not lst:
raise ParseError, "empty OR list"
# We may need a lingering finalization.
if last:
try:
last.finalize()
except self.terms.error, e:
raise ParseError, str(e)
# Special bonus case to avoid making pointless order-1
# orl nodes.
if len(lst) == 1:
return lst[0]
else:
return OrNode(lst)
# andl -> orl [AND andl]
def parseand(self):
left = self.parseorlist()
if self.lex[0] not in ANDNODE:
return left
if not left:
raise ParseError, "empty left AND clause"
self.lex.pop(0)
if self.lex[0] == lexr.EOF:
raise ParseError, "empty right AND clause"
return AndNode(left, self.parseand())
# exceptl -> andl [EXCEPT exceptl]
def parseexcept(self):
left = self.parseand()
if self.lex[0] not in EXCEPTNODE:
return left
if not left:
raise ParseError, "empty left EXCEPT clause"
self.lex.pop(0)
if self.lex[0] == lexr.EOF:
raise ParseError, "empty right EXCEPT clause"
return ExceptNode(left, self.parseexcept())
# parsing starts at EXCEPT, demands an EOF at the end, and insists
# that we have something to start with.
def parse(self):
if self.lex[0] == lexr.EOF:
raise ParseError, "Nothing to parse"
root = self.parseexcept()
if self.lex[0] != lexr.EOF:
raise ParseError, "expected EOL, got token %s" % (self.lex[0][1],)
return root
# To parse, we generate a parse object to stash data and call it.
def parse(s, terminfo):
try:
lexlst = lexr.tokenize(s)
except lexr.BadInput, e:
raise ParseError, e
P = Parse(lexlst, terminfo)
return P.parse()