def left_loops(node): ''' Return (an estimate of) all left-recursive loops from the given node. We cannot know for certain whether a loop is left recursive because we don't know exactly which parsers will consume data. But we can estimate by assuming that all matchers eventually (ie via their children) consume something. We can also improve that slightly by ignoring `Lookahead`. So we estimate left-recursive loops as paths that start and end at the given node, and which are first children of intermediate nodes unless the node is `Or`, or the preceding matcher is a `Lookahead`. Each loop is a list that starts and ends with the given node. ''' from lepl.matchers.combine import Or from lepl.matchers.core import Lookahead stack = [[node]] known = set([node]) # avoid getting lost in embedded loops while stack: ancestors = stack.pop() parent = ancestors[-1] if isinstance(parent, Matcher): for child in parent: family = list(ancestors) + [child] if child is node: yield family else: if child not in known: stack.append(family) known.add(child) if not is_child(parent, Or, fail=False) and \ not is_child(child, Lookahead, fail=False): break
def add(self, type_, node): ''' Add a node of a given type. ''' try: node_type = matcher_type(node) except MatcherTypeException: node_type = type(node) if type_ & LEAF: self.leaves += 1 if type_ & NONTREE and is_child(node_type, Matcher, fail=False): self.loops += 1 try: if node not in self.__known: self.__known.add(node) if node_type not in self.types: self.types[node_type] = set() self.types[node_type].add(node) if is_child(node_type, Matcher): self.total += 1 else: self.others += 1 else: self.duplicates += 1 except: self.unhashable += 1
def new_clone(i, j, node, args, kargs): type_, ok = None, False for parent in self.spec: if is_child(node, parent): type_ = self.spec[parent] if type_: ok = True for arg in args: if isinstance(arg, Matcher) and not \ isinstance(arg, NoTrampoline): ok = False for name in kargs: arg = kargs[name] if isinstance(arg, Matcher) and not \ isinstance(arg, NoTrampoline): ok = False if not ok: type_ = type(node) try: copy = type_(*args, **kargs) copy_standard_attributes(node, copy) return copy except TypeError as err: raise TypeError(fmt('Error cloning {0} with ({1}, {2}): {3}', type_, args, kargs, err))
def new_clone(node, args, kargs): type_, ok = None, False for parent in self.spec: if is_child(node, parent): type_ = self.spec[parent] if type_: ok = True for arg in args: if isinstance(arg, Matcher) and not \ isinstance(arg, NoTrampolineTransformableWrapper): ok = False for name in kargs: arg = kargs[name] if isinstance(arg, Matcher) and not \ isinstance(arg, NoTrampolineTransformableWrapper): ok = False if not ok: type_ = type(node) try: copy = type_(*args, **kargs) copy_standard_attributes(node, copy) return copy except TypeError as err: raise TypeError( format('Error cloning {0} with ({1}, {2}): {3}', type_, args, kargs, err))
def test_and(self): matcher = (Any() & Optional(Any())) > append('x') matcher.config.clear().compose_transforms() parser = matcher.get_parse() result = parser('a')[0] assert result == 'ax', result assert is_child(parser.matcher, And), type(parser.matcher)
def find_tokens(matcher): ''' Returns a set of Tokens. Also asserts that children of tokens are not themselves Tokens. Should we also check that a Token occurs somewhere on every path to a leaf node? ''' (tokens, visited, non_tokens) = (set(), set(), set()) stack = deque([matcher]) while stack: matcher = stack.popleft() if matcher not in visited: if is_child(matcher, NonToken): non_tokens.add(matcher) if isinstance(matcher, BaseToken): tokens.add(matcher) if matcher.content: assert_not_token(matcher.content, visited) else: for child in matcher: if isinstance(child, Matcher): stack.append(child) visited.add(matcher) if tokens and non_tokens: raise LexerError( format('The grammar contains a mix of Tokens and non-Token ' 'matchers at the top level. If Tokens are used then ' 'non-token matchers that consume input must only ' 'appear "inside" Tokens. The non-Token matchers ' 'include: {0}.', '; '.join(str(n) for n in non_tokens))) return tokens
def find_tokens(matcher): ''' Returns a set of Tokens. Also asserts that children of tokens are not themselves Tokens. Should we also check that a Token occurs somewhere on every path to a leaf node? ''' (tokens, visited, non_tokens) = (set(), set(), set()) stack = deque([matcher]) while stack: matcher = stack.popleft() if matcher not in visited: if is_child(matcher, NonToken): non_tokens.add(matcher) if isinstance(matcher, BaseToken): tokens.add(matcher) if matcher.content: assert_not_token(matcher.content, visited) else: for child in matcher: if isinstance(child, Matcher): stack.append(child) visited.add(matcher) if tokens and non_tokens: raise LexerError( format( 'The grammar contains a mix of Tokens and non-Token ' 'matchers at the top level. If Tokens are used then ' 'non-token matchers that consume input must only ' 'appear "inside" Tokens. The non-Token matchers ' 'include: {0}.', '; '.join(str(n) for n in non_tokens))) return tokens
def assert_children(self, b): ''' Check children are non-None. ''' # print('>>>{0!s}<<<'.format(b)) assert is_child(b, Or) for child in b.matchers: assert child
def RMemo(matcher): ''' Wrap in the _RMemo cache if required. ''' if is_child(matcher, NoMemo, fail=False): return matcher else: return _RMemo(matcher)
def _fmt_repr(self, indent, value, visited, key=None): prefix = (' ' * indent) + (key + '=' if key else '') if is_child(value, Matcher, fail=False): if value in visited: return prefix + '[' + value._small_str + ']' else: return value._indented_repr(indent, visited, key) else: return prefix + repr(value)
def LMemo(matcher, curtail=None): ''' Wrap in the _LMemo cache if required. ''' if is_child(matcher, NoMemo, fail=False): return matcher else: if curtail is None: curtail = lambda depth, length: depth > length return _LMemo(matcher, curtail)
def __call__(self, graph): from lepl.matchers.core import Delayed from lepl.matchers.combine import Or for delayed in [x for x in preorder(graph, Matcher) if isinstance(x, Delayed)]: for loop in either_loops(delayed, self.conservative): for i in range(len(loop)): if is_child(loop[i], Or, fail=False): # we cannot be at the end of the list here, since that # is a Delayed instance # copy from tuple to list loop[i].matchers = list(loop[i].matchers) matchers = loop[i].matchers target = loop[i+1] # move target to end of list index = matchers.index(target) del matchers[index] matchers.append(target) return graph
def __call__(self, graph): self._warn('Alternatives are being re-ordered to improve stability with left-recursion.\n' 'This will change the ordering of results.') #raise Exception('wtf') for delayed in [x for x in preorder(graph, Matcher) if isinstance(x, Delayed)]: for loop in either_loops(delayed, self.conservative): for i in range(len(loop)): if is_child(loop[i], Or, fail=False): # we cannot be at the end of the list here, since that # is a Delayed instance # copy from tuple to list loop[i].matchers = list(loop[i].matchers) matchers = loop[i].matchers target = loop[i+1] # move target to end of list index = matchers.index(target) del matchers[index] matchers.append(target) return graph
def __call__(self, graph): from lepl.matchers.core import Delayed from lepl.matchers.combine import Or for delayed in [ x for x in preorder(graph, Matcher) if isinstance(x, Delayed) ]: for loop in either_loops(delayed, self.conservative): for i in range(len(loop)): if is_child(loop[i], Or, fail=False): # we cannot be at the end of the list here, since that # is a Delayed instance # copy from tuple to list loop[i].matchers = list(loop[i].matchers) matchers = loop[i].matchers target = loop[i + 1] # move target to end of list index = matchers.index(target) del matchers[index] matchers.append(target) return graph
def non_optional_copy(matcher): ''' Check whether a matcher is optional and, if so, make it not so. ''' # both of the "copy" calls below make me nervous - it's not the # way the rest of lepl works - but i don't have any specific # criticism, or a good alternative. required, optional = matcher, False if isinstance(matcher, Transform): temp, optional = non_optional_copy(matcher.matcher) if optional: required = copy(matcher) required.matcher = temp elif is_child(matcher, BaseSearch, fail=False): # this introspection only works because Repeat sets named # (ie kargs) arguments. optional = (matcher.start == 0) if optional: required = copy(matcher) required.start = 1 if required.stop == 1: required = required.first return required, optional