def new_step(self): root = self.stack[-1].root name = root.name calls = self.stack[-1].calls #print len(self.stack)*" ", "matching", name if name in ["and", "args", "output", "or"]: calls.extend(root) elif name in ["bound", "negation", "quantified"]: calls.append(root[0]) elif name == "apply": # print " "*len(self.stack), "matching", name, root[NAME], self.input[1], self.input[0][self.input[1]+1:self.input[1]+11] if root[NAME] == "anything": return pop(self.input) key = (root[NAME], id(self.input[0]), self.input[1]) if key in self.memoizer: self.input = self.memoizer[key][1][:] return self.memoizer[key][0] self.stack[-1].key = key calls.append(self.rules[root[NAME]][BODY]) elif name in ["exactly", "token"]: if name == "token": while pop(self.input) in ['\t', '\n', '\r', ' ']: pass if self.input[1] == len(self.input[0]): return MatchError("EOF") self.input[1] -= 1 for char in root[0]: if pop(self.input) != char: return MatchError("Not exactly %s" % root[0]) return root[0] return Eval
def token(s): while g.input.next() in ['\t', '\n', '\r', ' ']: pass g.input.position -= 1 for char in s: if g.input.next() != char: return MatchError("Not exactly %s" % char) return s
def or_(children): saved = g.input.position for child in children: g.input.position = saved output = child() if not isinstance(output, MatchError): return output g.input.position = saved return MatchError("No OR child matches")
def and_(children): saved = g.input.position outputs = [] output_mode = False for child in children: output = child() if isinstance(output, MatchError): g.input.position = saved return MatchError("And match failed") if output_mode: if getattr(output, "name", None) == "out": outputs.extend(to_list(output.children)) else: if getattr(output, "name", None) == "out": outputs = output.children output_mode = True else: outputs.extend(to_list(output)) return "".join(outputs) if outputs and type(outputs) == list and all(type(output) == str for output in outputs) and len(outputs[0]) == 1\ else outputs
def get_active(self, key): if key not in self.active: raise MatchError() return self.active[key]
def parse(self, rule_name, input, **kwargs): output = self.match(self.rules[rule_name][-1], input, **kwargs) if type(output) == MatchError or len( self.input[0]) == self.input[1] + 1: return output return MatchError("Not all input read")
def pop(input): input[1] += 1 try: return input[0][input[1]] except IndexError: return MatchError("EOF")
def next_step(self): frame = self.stack[-1] root = frame.root name = root.name outputs = frame.outputs output = outputs[-1] if outputs else None is_error = type(output) == MatchError finished = len(outputs) == len(frame.calls) if is_error and name not in ["quantified", "or", "negation"]: return output elif not (finished or name in ["or", "quantified"]): return Eval if name in ["and", "args", "output"]: if any(child.name == "output" for child in root): outputs = [ output for child, output in zip(root, outputs) if child.name == "output" ] return to_node(outputs, self.join_str) elif name == "quantified": assert (root[1].name == "quantifier") lower, upper = { "*": (0, inf), "+": (1, inf), "?": (0, 1) }[root[1][0]] if is_error: self.input = frame.input[:] outputs.pop() #print("output len", len(outputs)) if is_error or len(outputs) == upper or frame.input == self.input: if lower > len(outputs): return MatchError("Matched %s < %s times" % (len(outputs), lower)) else: return to_node(outputs, self.join_str) else: frame.input = self.input[:] self.stack[-1].calls.append(root[0]) elif name == "or": if is_error: self.input = frame.input[:] if finished: return MatchError("All Or matches failed") else: return output elif name == "apply": if root[NAME] == "escaped_char" and not is_error: chars = dict( ["''", '""', "t\t", "n\n", "r\r", "b\b", "f\f", "\\\\"]) return chars[output] and_node = getattr(output, "name", None) == "And" make_node = "!" in self.rules[root[NAME]][FLAGS] or\ (and_node and len(output) > 1) #print len(self.stack)*" ", "returned", output if make_node: output = Node(root[NAME], to_list(output)) self.memoizer[frame.key] = (output, self.input[:]) return output elif name in "bound": return Node(root[1][0], to_list(output)) elif name == "negation": if is_error: self.input = frame.input return None else: return MatchError("Negation true") else: raise Exception("Unknown operator %s" % name) return Eval
def exactly(char): ichar = g.input.next() return ichar if isinstance(ichar, MatchError) or char == ichar\ else MatchError("Not exactly %s" % char)
def between(start, end): ichar = g.input.next() return ichar if isinstance(ichar, MatchError) or start <= ichar <= end\ else MatchError("Not between %s and %s" % (start, end))
def rule_anything(): char = g.input.next() return MatchError("End_of_file") if char is None else char
def next(self): self.position += 1 try: return self.source[self.position] except IndexError: return MatchError("EOF")
def negation(child): saved = g.input.position output = child() g.input.position = saved return None if isinstance(output, MatchError) else MatchError("Negation_is_true")
def out(child=lambda: None): output = child() return output if isinstance(output, MatchError) else Node("out", output) def quantified(child, (_, quantifier)): lower, upper = {"*": (0, inf), "+": (1, inf), "?": (0, 1)}[quantifier] outputs = [] count = 0 start_saved = g.input.position while count < upper: saved = g.input.position output = child() if isinstance(output, MatchError): if count < lower: g.input.position = start_saved return MatchError("Quantified undermatch %s < %s" % (count, lower)) else: g.input.position = saved return outputs outputs.extend(to_list(output)) count += 1 return outputs def negation(child): saved = g.input.position output = child() g.input.position = saved return None if isinstance(output, MatchError) else MatchError("Negation_is_true") def bound(child, (_, name)): saved = g.input.position
def match(self, root, new_input=None, new_pos=-1): """ >>> g.match(g.rules['grammar'][-1], "x='y'") """ if new_input is not None: self.input = [new_input, new_pos] old_input = self.input[:] name = root.name #print("matching %s" % name) if name in ["and", "args", "body", "output"]: outputs = [self.match(child) for child in root] if any(child.name == "output" for child in root): outputs = [ output for child, output in zip(root, outputs) if child.name == "output" ] elif name == "quantified": assert (root[1].name == "quantifier") lower, upper = { "*": (0, inf), "+": (1, inf), "?": (0, 1) }[root[1][0]] outputs = [] while len(outputs) < upper: last_input = self.input[:] try: outputs.append(self.match(root[0])) except MatchError: self.input = last_input[:] break if last_input == self.input: break if lower > len(outputs): raise MatchError("Matched %s < %s times" % (len(outputs), lower)) elif name == "or": for child in root: try: return self.match(child) except MatchError: self.input = old_input[:] raise MatchError("All Or matches failed") elif name in ["exactly", "token"]: if name == "token": while pop(self.input) in ['\t', '\n', '\r', ' ']: pass self.input[1] -= 1 if pop(self.input) == root[0]: return root[0] else: raise MatchError("Not exactly %s" % root) elif name == "apply": #print "rule %s" % root[NAME] if root[NAME] == "anything": return pop(self.input) outputs = self.match(self.rules[root[NAME]][BODY]) if root[NAME] == "escaped_char": chars = dict( ["''", '""', "t\t", "n\n", "r\r", "b\b", "f\f", "\\\\"]) return chars[outputs] and_node = getattr(outputs, "name", None) == "And" make_node = "!" in self.rules[root[NAME]][FLAGS] or\ (and_node and len(outputs) > 1) if not make_node: return outputs return Node(root[NAME], to_list(outputs)) elif name in "bound": return Node(root[1][0], to_list(self.match(root[0]))) elif name == "negation": try: self.match(root[0]) except MatchError: self.input = old_input return None raise MatchError("Negation true") else: raise Exception("Unknown operator %s" % name) outputs = [elem for output in outputs for elem in to_list(output)] if len(outputs) == 1: return outputs[0] elif len(outputs) == 0: return None else: if self.join_str and all( type(output) == str for output in outputs): return "".join(outputs) return Node("And", outputs)