Beispiel #1
0
 def generator(stream_before):
     '''
     This creates the sequence of tokens returned by the stream.
     '''
     try:
         while stream_before:
             try:
                 (terminals, size, stream_after) = \
                         tokens.size_match(stream_before)
                 if stream_after == stream_before:
                     raise RuntimeLexerError('Tokens matched an empty '
                         'string.\nChange your token definitions so that '
                         'they cannot be empty.')
                 log.debug(format('Token: {0!r} {1!r} {2!r}', 
                                  terminals, size, stream_before))
                 # stream_before here to give correct location
                 yield (terminals, size, stream_before)
                 stream_before = stream_after
             except TypeError:
                 (terminals, size, stream_before) = \
                         discard.size_match(stream_before)
                 log.debug(format('Space: {0!r} {1!r}', terminals, size))
     except TypeError:
         raise RuntimeLexerError(
             format('No lexer for \'{0}\' at line {1} character {2} of {3}.',
                    stream_before.text, stream_before.line_number,
                    stream_before.line_offset, stream_before.source))
Beispiel #2
0
 def to_regexps(cls, use, possibles, have_add=False):
     '''
     Convert to regular expressions.
     
     `have_add` indicaes whether the caller can supply an "add".
     None - caller doesn't care what lower code needed.
     True - caller has add, and caller should need that.
     False - caller doesn't have add, and caller should not need it.
     '''
     regexps = []
     for possible in possibles:
         if isinstance(possible, RegexpContainer):
             cls.log.debug(format('unpacking: {0!s}', possible))
             if have_add is None or possible.add_reqd == have_add:
                 regexps.append(possible.regexp)
                 # this flag indicates that it's "worth" using the regexp
                 # so we "inherit"
                 use = use or possible.use
             else:
                 raise Unsuitable('Add inconsistent.')
         else:
             cls.log.debug(format('cannot unpack: {0!s}', 
                                  possible.__class__))
             raise Unsuitable('Not a container.')
     return (use, regexps)
Beispiel #3
0
 def clone_dfs(use, original, first, start, stop, rest=None):
     '''
     We only convert DFS if start=0 or 1, stop=1 or None and first and 
     rest are both regexps.
     
     This forces use=True as it is likely that a regexp is a gain.
     '''
     assert not isinstance(original, Transformable)
     try:
         if start not in (0, 1) or stop not in (1, None):
             raise Unsuitable()
         (use, [first, rest]) = \
                 RegexpContainer.to_regexps(True, [first, rest])
         # we need to be careful here to get the depth first bit right
         if stop is None:
             regexp = Sequence([first, Repeat([rest], alphabet)], alphabet)
             if start == 0:
                 regexp = Choice([regexp, Empty(alphabet)], alphabet)
         else:
             regexp = first
             if start == 0:
                 regexp = Choice([regexp, Empty(alphabet)], alphabet)
         log.debug(format('DFS: cloned {0}', regexp))
         return RegexpContainer.build(original,
                                      regexp,
                                      alphabet,
                                      matcher_type,
                                      use,
                                      add_reqd=stop is None)
     except Unsuitable:
         log.debug(format('DFS: not rewritten: {0}', original))
         return original
Beispiel #4
0
 def fmt_intervals(self, intervals):
     '''
     Hide unicode chars because of some strange error that occurs with
     Python2.6 on the command line.
     
     This is in StrAlphabet, but for ASCII makes no difference.  Having it
     here helps LineAwareAlphabet work (the whole idea of subclassing
     alphabets etc is not so great).
     '''
     def pretty(c):
         x = self._escape_char(c)
         if len(x) > 1 or 32 <= ord(x) <= 127:
             return str(x)
         elif ord(c) < 0x100:
             return format('\\x{0:02x}', ord(c)) 
         elif ord(c) < 0x10000:
             return format('\\u{0:04x}', ord(c)) 
         else:
             return format('\\U{0:08x}', ord(c)) 
     ranges = []
     if len(intervals) == 1:
         if intervals[0][0] == intervals[0][1]:
             return self._escape_char(intervals[0][0])
         elif intervals[0][0] == self.min and intervals[0][1] == self.max:
             return '.'
     # pylint: disable-msg=C0103
     # (sorry. but i use this (a, b) convention throughout the regexp lib) 
     for (a, b) in intervals:
         if a == b:
             ranges.append(pretty(a))
         else:
             ranges.append(format('{0!s}-{1!s}', pretty(a), pretty(b)))
     return format('[{0}]', self.join(ranges))
Beispiel #5
0
    def test_random(self):
        '''
        Compares lepl + python expressions.  This runs 'til it fails, and it
        always does fail, because lepl's expressions are guaranteed greedy
        while python's aren't.  This is "normal" (Perl is the same as Python)
        but I cannot fathom why it should be - it seems *harder* to make them
        work that way... 
        '''
        basicConfig(level=DEBUG)
        log = getLogger('lepl.regexp._test.random')
        match_alphabet = '012'
        string_alphabet = '013'
        for _ in range(100):
            expression = random_expression(3, match_alphabet) 
            string = random_string(3, string_alphabet)
            matcher = DfaRegexp(expression)
#            matcher = NfaRegexp(expression)
            matcher.config.no_full_first_match()
            lepl_result = matcher.parse(string)
            if lepl_result:
                lepl_result = lepl_result[0]
            log.debug(format('{0} {1} {2}', expression, string, lepl_result))
            try:
                python_result = compile_(expression).match(string) 
                if python_result:
                    python_result = python_result.group()
                assert lepl_result == python_result, \
                    format('{0} != {1}\n{2} {3}', 
                           lepl_result, python_result, expression, string)
            except:
                (e, v, _t) = exc_info()
                if repr(v) == "error('nothing to repeat',)":
                    pass
                else:
                    raise e
Beispiel #6
0
 def clone_dfs(use, original, first, start, stop, rest=None):
     '''
     We only convert DFS if start=0 or 1, stop=1 or None and first and 
     rest are both regexps.
     
     This forces use=True as it is likely that a regexp is a gain.
     '''
     assert not isinstance(original, Transformable)
     try:
         if start not in (0, 1) or stop not in (1, None):
             raise Unsuitable()
         (use, [first, rest]) = \
                 RegexpContainer.to_regexps(True, [first, rest])
         # we need to be careful here to get the depth first bit right
         if stop is None:
             regexp = Sequence([first, Repeat([rest], alphabet)], alphabet)
             if start == 0:
                 regexp = Choice([regexp, Empty(alphabet)], alphabet)
         else:
             regexp = first
             if start == 0:
                 regexp = Choice([regexp, Empty(alphabet)], alphabet)
         log.debug(format('DFS: cloned {0}', regexp))
         return RegexpContainer.build(original, regexp, alphabet, 
                                      matcher_type, use, 
                                      add_reqd=stop is None)
     except Unsuitable:
         log.debug(format('DFS: not rewritten: {0}', original))
         return original
Beispiel #7
0
    def fmt_intervals(self, intervals):
        '''
        This must fully describe the data in the intervals (it is used to
        hash the data).
        '''
        def pretty(c):
            x = self._escape_char(c)
            if len(x) > 1 or str(' ') <= str(x) <= str('~'):
                return str(x)
            else:
                return repr(c)[1:-1]

        ranges = []
        if len(intervals) == 1:
            if intervals[0][0] == intervals[0][1]:
                return self._escape_char(intervals[0][0])
            elif intervals[0][0] == self.min and intervals[0][1] == self.max:
                return '.'
        # pylint: disable-msg=C0103
        # (sorry. but i use this (a, b) convention throughout the regexp lib)
        for (a, b) in intervals:
            if a == b:
                ranges.append(self._escape_char(a))
            else:
                ranges.append(format('{0!s}-{1!s}', pretty(a), pretty(b)))
        return format('[{0}]', self.join(ranges))
Beispiel #8
0
 def __str__(self):
     generator = self.__wrapper()
     if generator:
         return format('{0} ({1:d}/{2:d})', self.__describe,
                       self.order_epoch, self.__last_known_epoch)
     else:
         return format('Empty ref to {0}', self.__describe)
Beispiel #9
0
 def generator(stream=stream):
     '''
     This creates the sequence of tokens returned by the stream.
     '''
     try:
         while stream:
             try:
                 (terminals, match, stream_after) = tokens.match(stream)
                 if stream_after == stream:
                     raise RuntimeLexerError(
                         'Tokens matched an empty '
                         'string.\nChange your token definitions so that '
                         'they cannot be empty.')
                 else:
                     stream = stream_after
                 log.debug(
                     format('Token: {0!r} {1!r} {2!r}', terminals, match,
                            stream))
                 yield (terminals, match)
             except TypeError:
                 (terminals, _size, stream) = discard.size_match(stream)
                 log.debug(format('Space: {0!r} {1!r}', terminals, discard))
     except TypeError:
         raise RuntimeLexerError(format('No lexer for \'{0}\'.', stream))
     except AttributeError:
         raise RuntimeLexerError(format('No discard for \'{0}\'.', stream))
Beispiel #10
0
 def generator(stream_before):
     '''
     This creates the sequence of tokens returned by the stream.
     '''
     try:
         while stream_before:
             try:
                 (terminals, size, stream_after) = \
                         tokens.size_match(stream_before)
                 if stream_after == stream_before:
                     raise RuntimeLexerError(
                         'Tokens matched an empty '
                         'string.\nChange your token definitions so that '
                         'they cannot be empty.')
                 log.debug(
                     format('Token: {0!r} {1!r} {2!r}', terminals, size,
                            stream_before))
                 # stream_before here to give correct location
                 yield (terminals, size, stream_before)
                 stream_before = stream_after
             except TypeError:
                 (terminals, size, stream_before) = \
                         discard.size_match(stream_before)
                 log.debug(format('Space: {0!r} {1!r}', terminals, size))
     except TypeError:
         raise RuntimeLexerError(
             format(
                 'No lexer for \'{0}\' at line {1} character {2} of {3}.',
                 stream_before.text, stream_before.line_number,
                 stream_before.line_offset, stream_before.source))
Beispiel #11
0
 def fmt_intervals(self, intervals):
     '''
     This must fully describe the data in the intervals (it is used to
     hash the data).
     '''
     def pretty(c):
         x = self._escape_char(c)
         if len(x) > 1 or str(' ') <= str(x) <= str('~'):
             return str(x)
         else:
             return repr(c)[1:-1]
     ranges = []
     if len(intervals) == 1:
         if intervals[0][0] == intervals[0][1]:
             return self._escape_char(intervals[0][0])
         elif intervals[0][0] == self.min and intervals[0][1] == self.max:
             return '.'
     # pylint: disable-msg=C0103
     # (sorry. but i use this (a, b) convention throughout the regexp lib) 
     for (a, b) in intervals:
         if a == b:
             ranges.append(self._escape_char(a))
         else:
             ranges.append(format('{0!s}-{1!s}', pretty(a), pretty(b)))
     return format('[{0}]', self.join(ranges))
Beispiel #12
0
 def __str__(self):
     '''
     Example:
     0: 3, 4; 1: 2; 2(Tk1); 3: [{u'\x00'}-`b-{u'\U0010ffff'}]->3, 1; 
     4: {$}->5, 7; 5: 6; 6($); 7: {^}->10; 8: 9; 9(^); 10: 11; 
     11: [      ]->11, 8
     
     Node 0 leads to 3 and 4 (both empty)
     Node 1 leads to 2 (empty)
     Node 2 is terminal, labelled with "Tk1"
     Node 3 loops back to 3 for a character in the given range, or to 1
     etc.
     '''
     lines = []
     for node in self:
         edges = []
         for (dest, edge) in self.transitions(node):
             edges.append(format('{0}->{1}', edge, dest))
         for dest in self.empty_transitions(node):
             edges.append(str(dest))
         label = '' if self.terminal(node) is None \
                    else format('({0})', self.terminal(node))
         if edges:
             lines.append(
                 format('{0}{1}: {2}', node, label, ', '.join(edges)))
         else:
             lines.append(format('{0}{1}', node, label))
     return '; '.join(lines)
Beispiel #13
0
 def __str__(self):
     '''
     Example:
     0: 3, 4; 1: 2; 2(Tk1); 3: [{u'\x00'}-`b-{u'\U0010ffff'}]->3, 1; 
     4: {$}->5, 7; 5: 6; 6($); 7: {^}->10; 8: 9; 9(^); 10: 11; 
     11: [      ]->11, 8
     
     Node 0 leads to 3 and 4 (both empty)
     Node 1 leads to 2 (empty)
     Node 2 is terminal, labelled with "Tk1"
     Node 3 loops back to 3 for a character in the given range, or to 1
     etc.
     '''
     lines = []
     for node in self:
         edges = []
         for (dest, edge) in self.transitions(node):
             edges.append(format('{0}->{1}', edge, dest))
         for dest in self.empty_transitions(node):
             edges.append(str(dest))
         label = '' if self.terminal(node) is None \
                    else format('({0})', self.terminal(node))
         if edges:
             lines.append(
                 format('{0}{1}: {2}', node, label, ', '.join(edges)))
         else:
             lines.append(format('{0}{1}', node, label))
     return '; '.join(lines)
Beispiel #14
0
 def clone_wrapper(use, original, *args, **kargs):
     factory = original.factory
     if factory in map_:
         log.debug(format('Found {0}', factory))
         return map_[factory](use, original, *args, **kargs)
     else:
         log.debug(format('No clone for {0}, {1}', factory, map_.keys()))
         return original
Beispiel #15
0
 def record_success(count, stream_in, result):
     (value, stream_out) = result
     count_desc = format(' ({0})', count) if count > 1 else ''
     # Python bug #4618
     print(format('{0}{1} = {2}\n    "{3}" -> "{4}"', 
                  name, count_desc, value, 
                  format_stream(stream_in), format_stream(stream_out)), 
           file=out, end=str('\n'))
Beispiel #16
0
 def __str__(self):
     generator = self.__wrapper()
     if generator:
         return format('{0} ({1:d}/{2:d})',
                       self.__describe, self.order_epoch, 
                       self.__last_known_epoch)
     else:
         return format('Empty ref to {0}', self.__describe)
Beispiel #17
0
 def clone_wrapper(use, original, *args, **kargs):
     factory = original.factory
     if factory in map_:
         log.debug(format('Found {0}', factory))
         return map_[factory](use, original, *args, **kargs)
     else:
         log.debug(format('No clone for {0}, {1}', factory, map_.keys()))
         return original
Beispiel #18
0
 def nfa(self):
     '''
     Generate a NFA-based matcher.
     '''
     self._debug(format('compiling to nfa: {0}', self))
     graph = NfaGraph(self.alphabet)
     self.expression.build(graph, graph.new_node(), graph.new_node())
     self._debug(format('nfa graph: {0}', graph))
     return NfaPattern(graph, self.alphabet)
Beispiel #19
0
 def record_success(count, stream_in, result):
     (value, stream_out) = result
     count_desc = format(' ({0})', count) if count > 1 else ''
     # Python bug #4618
     print(format('{0}{1} = {2}\n    "{3}" -> "{4}"', name,
                  count_desc, value, format_stream(stream_in),
                  format_stream(stream_out)),
           file=out,
           end=str('\n'))
Beispiel #20
0
 def nfa(self):
     '''
     Generate a NFA-based matcher.
     '''
     self._debug(format('compiling to nfa: {0}', self))
     graph = NfaGraph(self.alphabet)
     self.expression.build(graph, graph.new_node(), graph.new_node())
     self._debug(format('nfa graph: {0}', graph))
     return NfaPattern(graph, self.alphabet)
Beispiel #21
0
 def raise_(self, value):
     '''
     Log when enabled.
     '''
     if self.enabled > 0:
         if type(value) is StopIteration:
             self._info(self.fmt_final_result(format('raise {0!r}', value)))
         else:
             self._warn(self.fmt_final_result(format('raise {0!r}', value)))
Beispiel #22
0
 def pretty(c):
     x = self._escape_char(c)
     if len(x) > 1 or 32 <= ord(x) <= 127:
         return str(x)
     elif ord(c) < 0x100:
         return format('\\x{0:02x}', ord(c)) 
     elif ord(c) < 0x10000:
         return format('\\u{0:04x}', ord(c)) 
     else:
         return format('\\U{0:08x}', ord(c)) 
Beispiel #23
0
 def before_throw(self, generator, value):
     '''
     Log when enabled.
     '''
     if self.enabled > 0:
         self.generator = generator
         if type(value) is StopIteration:
             self.action = format('stop  ->  {0}', generator)
         else:
             self.action = format('{1!r}  ->  {0}', generator, value)
Beispiel #24
0
 def clone_and(use, original, *matchers):
     '''
     We can convert an And only if all the sub-matchers have possible
     regular expressions, and even then we must tag the result unless
     an add transform is present.
     '''
     assert isinstance(original, Transformable)
     try:
         # since we're going to require add anyway, we're happy to take
         # other inputs, whether add is required or not.
         (use, regexps) = \
             RegexpContainer.to_regexps(use, matchers, add_reqd=None)
         # if we have regexp sub-expressions, join them
         regexp = Sequence(regexps, alphabet)
         log.debug(format('And: cloning {0}', regexp))
         if use and len(original.wrapper.functions) > 1 \
                 and original.wrapper.functions[0] is add:
             # we have additional functions, so cannot take regexp higher,
             # but use is True, so return a new matcher.
             # hack to copy across other functions
             original.wrapper = \
                     TransformationWrapper(original.wrapper.functions[1:])
             log.debug('And: OK (final)')
             # NEED TEST FOR THIS
             return single(alphabet, original, regexp, matcher_type)
         elif len(original.wrapper.functions) == 1 \
                 and original.wrapper.functions[0] is add:
             # OR JUST ONE?
             # lucky!  we just combine and continue
             log.debug('And: OK')
             return RegexpContainer.build(original,
                                          regexp,
                                          alphabet,
                                          matcher_type,
                                          use,
                                          transform=False)
         elif not original.wrapper:
             # regexp can't return multiple values, so hope that we have
             # an add
             log.debug('And: add required')
             return RegexpContainer.build(original,
                                          regexp,
                                          alphabet,
                                          matcher_type,
                                          use,
                                          add_reqd=True)
         else:
             log.debug(
                 format('And: wrong transformation: {0!r}',
                        original.wrapper))
             return original
     except Unsuitable:
         log.debug(format('And: not rewritten: {0}', original))
         return original
Beispiel #25
0
 def dfa(self):
     '''
     Generate a DFA-based matcher (faster than NFA, but returns only a
     single, greedy match).
     '''
     self._debug(format('compiling to dfa: {0}', self))
     ngraph = NfaGraph(self.alphabet)
     self.expression.build(ngraph, ngraph.new_node(), ngraph.new_node())
     self._debug(format('nfa graph: {0}', ngraph))
     dgraph = NfaToDfa(ngraph, self.alphabet).dfa
     self._debug(format('dfa graph: {0}', dgraph))
     return DfaPattern(dgraph, self.alphabet)
Beispiel #26
0
 def __str__(self):
     lines = []
     for node in self:
         edges = []
         for (dest, edge) in self.transitions(node):
             edges.append(format("{0}->{1}", edge, dest))
         nodes = [n for n in self.nfa_nodes(node)]
         edges = " " + ",".join(edges) if edges else ""
         labels = list(self.terminals(node))
         labels = format("({0})", ",".join(str(label) for label in labels)) if labels else ""
         lines.append(format("{0}{1}: {2}{3}", node, labels, nodes, edges))
     return "; ".join(lines)
Beispiel #27
0
 def dfa(self):
     '''
     Generate a DFA-based matcher (faster than NFA, but returns only a
     single, greedy match).
     '''
     self._debug(format('compiling to dfa: {0}', self))
     ngraph = NfaGraph(self.alphabet)
     self.expression.build(ngraph, ngraph.new_node(), ngraph.new_node())
     self._debug(format('nfa graph: {0}', ngraph))
     dgraph = NfaToDfa(ngraph, self.alphabet).dfa
     self._debug(format('dfa graph: {0}', dgraph))
     return DfaPattern(dgraph, self.alphabet)
Beispiel #28
0
 def __str__(self):
     counts = format('total:      {total:3d}\n'
                     'leaves:     {leaves:3d}\n'
                     'loops:      {loops:3d}\n'
                     'duplicates: {duplicates:3d}\n'
                     'others:     {others:3d}\n'
                     'unhashable: {unhashable:3d}\n', **self.__dict__)
     keys = list(self.types.keys())
     keys.sort(key=repr)
     types = '\n'.join([format('{0:40s}: {1:3d}', key, len(self.types[key]))
                        for key in keys])
     return counts + types
Beispiel #29
0
 def reusable(self, generator):
     '''
     Check we can re-use the wrapper.
     '''
     wrapped = self.__wrapper()
     if not wrapped:
         assert self.__count == 0, \
             format('GCed but still on stack?! {0}', self.__describe)
         return False
     else:
         assert wrapped is generator, \
             format('Hash collision? {0}/{1}', generator, wrapped)
         return True
Beispiel #30
0
 def __str__(self):
     lines = []
     for node in self:
         edges = []
         for (dest, edge) in self.transitions(node):
             edges.append(format('{0}->{1}', edge, dest))
         nodes = [n for n in self.nfa_nodes(node)]
         edges = ' ' + ','.join(edges) if edges else ''
         labels = list(self.terminals(node))
         labels = format('({0})', ','.join(str(label) for label in labels)) \
                  if labels else ''
         lines.append(format('{0}{1}: {2}{3}', node, labels, nodes, edges))
     return '; '.join(lines)
Beispiel #31
0
 def reusable(self, generator):
     '''
     Check we can re-use the wrapper.
     '''
     wrapped = self.__wrapper()
     if not wrapped:
         assert self.__count == 0, \
             format('GCed but still on stack?! {0}', self.__describe)
         return False
     else:
         assert wrapped is generator, \
             format('Hash collision? {0}/{1}', generator, wrapped)
         return True
Beispiel #32
0
 def __str__(self):
     lines = []
     for node in self:
         edges = []
         for (dest, edge) in self.transitions(node):
             edges.append(format('{0}->{1}', edge, dest))
         nodes = [n for n in self.nfa_nodes(node)]
         edges = ' ' + ','.join(edges) if edges else ''
         labels = list(self.terminals(node))
         labels = format('({0})', ','.join(str(label) for label in labels)) \
                  if labels else ''
         lines.append(format('{0}{1}: {2}{3}', node, labels, nodes, edges))
     return '; '.join(lines)
Beispiel #33
0
 def __init__(self, stream):
     try:
         if stream.line_number is None:
             msg = format("The match failed at '{0}',"
                          "\nIndex {1} of {2}.",
                          stream, stream.line_offset, stream.source)
         else:
             msg = format("The match failed at '{0}',"
                          "\nLine {1}, character {2} of {3}.",
                          stream, stream.line_number, stream.line_offset,
                          stream.source)
     except AttributeError:
         msg = format("The match failed at '{0}'.", stream)
     super(FullFirstMatchException, self).__init__(msg)
     self.stream = stream
Beispiel #34
0
 def clone_or(use, original, *matchers):
     '''
     We can convert an Or only if all the sub-matchers have possible
     regular expressions.
     '''
     assert isinstance(original, Transformable)
     try:
         (use, regexps) = RegexpContainer.to_regexps(use, matchers)
         regexp = Choice(regexps, alphabet)
         log.debug(format('Or: cloned {0}', regexp))
         return RegexpContainer.build(original, regexp, alphabet,
                                      matcher_type, use)
     except Unsuitable:
         log.debug(format('Or not rewritten: {0}', original))
         return original
Beispiel #35
0
 def clone_or(use, original, *matchers):
     '''
     We can convert an Or only if all the sub-matchers have possible
     regular expressions.
     '''
     assert isinstance(original, Transformable)
     try:
         (use, regexps) = RegexpContainer.to_regexps(use, matchers)
         regexp = Choice(regexps, alphabet)
         log.debug(format('Or: cloned {0}', regexp))
         return RegexpContainer.build(original, regexp, alphabet, 
                                      matcher_type, use)
     except Unsuitable:
         log.debug(format('Or not rewritten: {0}', original))
         return original
Beispiel #36
0
 def __str__(self):
     counts = format(
         'total:      {total:3d}\n'
         'leaves:     {leaves:3d}\n'
         'loops:      {loops:3d}\n'
         'duplicates: {duplicates:3d}\n'
         'others:     {others:3d}\n'
         'unhashable: {unhashable:3d}\n', **self.__dict__)
     keys = list(self.types.keys())
     keys.sort(key=repr)
     types = '\n'.join([
         format('{0:40s}: {1:3d}', key, len(self.types[key]))
         for key in keys
     ])
     return counts + types
Beispiel #37
0
 def new_clone(node, args, kargs):
     type_, ok = None, False
     for parent in self.spec:
         if is_child(node, parent):
             type_ = self.spec[parent]
     if type_:
         ok = True
         for arg in args:
             if isinstance(arg, Matcher) and not \
                     isinstance(arg, NoTrampolineTransformableWrapper):
                 ok = False
         for name in kargs:
             arg = kargs[name]
             if isinstance(arg, Matcher) and not \
                     isinstance(arg, NoTrampolineTransformableWrapper):
                 ok = False
     if not ok:
         type_ = type(node)
     try:
         copy = type_(*args, **kargs)
         copy_standard_attributes(node, copy)
         return copy
     except TypeError as err:
         raise TypeError(format('Error cloning {0} with ({1}, {2}): {3}',
                                type_, args, kargs, err))
Beispiel #38
0
 def before_send(self, generator, value):
     '''
     Log when enabled.
     '''
     if self.enabled > 0:
         self.generator = generator
         self.action = format('{1!r}  ->  {0}', generator, value)
Beispiel #39
0
 def before_next(self, generator):
     '''
     Log when enabled.
     '''
     if self.enabled > 0:
         self.generator = generator
         self.action = format('next({0})', generator)
Beispiel #40
0
 def push_level(self, level):
     '''
     Add a new indent level.
     '''
     self.__stack.append(level)
     self.__state[BlockMonitor] = level
     self._debug(format('Indent -> {0:d}', level))
Beispiel #41
0
 def __init__(self, stream):
     try:
         if stream.line_number is None:
             msg = format(
                 "The match failed at '{0}',"
                 "\nIndex {1} of {2}.", stream, stream.line_offset,
                 stream.source)
         else:
             msg = format(
                 "The match failed at '{0}',"
                 "\nLine {1}, character {2} of {3}.", stream,
                 stream.line_number, stream.line_offset, stream.source)
     except AttributeError:
         msg = format("The match failed at '{0}'.", stream)
     super(FullFirstMatchException, self).__init__(msg)
     self.stream = stream
Beispiel #42
0
 def size_match(self, stream):
     '''
     Match against the stream, but return the length of the match.
     '''
     state = 0
     size = 0
     longest = (self.__empty_labels, 0, stream) \
                 if self.__empty_labels else None
     while stream:
         future = self.__table[state][stream[0]]
         self._debug(
             format('stream {0!s}: {1} -> {2}', stream[0], state, future))
         if future is None:
             break
         # update state
         (state, terminals) = future
         size += 1
         # it might be faster to use size as an index here  - it's a
         # trade-odd depending on line length.  probably worth measuring.
         stream = stream[1:]
         # match is strictly increasing, so storing the length is enough
         # (no need to make an expensive copy)
         if terminals:
             longest = (terminals, size, stream)
     return longest
Beispiel #43
0
    def __read(self, i, matcher, stream):
        '''
        Either return a value from previous cached values or call the
        embedded generator to get the next value (and then store it).
        '''
        if self.__active:
            raise MemoException(format('''Left recursion with RMemo?
i: {0}
table: {1!r}
stream: {2}/{3} (initially {4})
matcher: {5!s}''', 
i, self.__table, stream, type(stream), self.__cached_stream, matcher))
        try:
            while i >= len(self.__table) and not self.__stopped:
                try:
                    self.__active = True
                    self.__cached_stream = stream
                    result = yield self.__generator
                finally:
                    self.__active = False
                self.__table.append(result)
        except StopIteration:
            self.__stopped = True
        if i < len(self.__table):
            yield self.__table[i]
        else:
            raise StopIteration()
Beispiel #44
0
    def __read(self, i, matcher, stream):
        '''
        Either return a value from previous cached values or call the
        embedded generator to get the next value (and then store it).
        '''
        if self.__active:
            raise MemoException(
                format(
                    '''Left recursion with RMemo?
i: {0}
table: {1!r}
stream: {2}/{3} (initially {4})
matcher: {5!s}''', i, self.__table, stream, type(stream), self.__cached_stream,
                    matcher))
        try:
            while i >= len(self.__table) and not self.__stopped:
                try:
                    self.__active = True
                    self.__cached_stream = stream
                    result = yield self.__generator
                finally:
                    self.__active = False
                self.__table.append(result)
        except StopIteration:
            self.__stopped = True
        if i < len(self.__table):
            yield self.__table[i]
        else:
            raise StopIteration()
Beispiel #45
0
 def new_clone(node, args, kargs):
     type_, ok = None, False
     for parent in self.spec:
         if is_child(node, parent):
             type_ = self.spec[parent]
     if type_:
         ok = True
         for arg in args:
             if isinstance(arg, Matcher) and not \
                     isinstance(arg, NoTrampolineTransformableWrapper):
                 ok = False
         for name in kargs:
             arg = kargs[name]
             if isinstance(arg, Matcher) and not \
                     isinstance(arg, NoTrampolineTransformableWrapper):
                 ok = False
     if not ok:
         type_ = type(node)
     try:
         copy = type_(*args, **kargs)
         copy_standard_attributes(node, copy)
         return copy
     except TypeError as err:
         raise TypeError(
             format('Error cloning {0} with ({1}, {2}): {3}', type_,
                    args, kargs, err))
Beispiel #46
0
 def __init__(self, conservative=False, left=None, right=None):
     super(AutoMemoize, self).__init__(
         Rewriter.MEMOIZE,
         format('AutoMemoize({0}, {1}, {2})', conservative, left, right))
     self.conservative = conservative
     self.left = left
     self.right = right
Beispiel #47
0
 def _format_repr(self, indent, key, contents):
     return format('{0}{1}{2}({3}{4})', 
                   ' ' * indent,
                   key + '=' if key else '',
                   self._small_str,
                   '' if self._fmt_compact else '\n',
                   ',\n'.join(contents))
Beispiel #48
0
 def _format_repr(self, indent, key, contents):
     return format('{0}{1}{2}({3}{4})', 
                   ' ' * indent,
                   key + '=' if key else '',
                   self._small_str,
                   '' if self._fmt_compact else '\n',
                   ',\n'.join(contents))
Beispiel #49
0
 def size_match(self, stream):
     '''
     Match against the stream, but return the length of the match.
     '''
     state = 0
     size = 0
     longest = (self.__empty_labels, 0, stream) \
                 if self.__empty_labels else None
     while stream:
         future = self.__table[state][stream[0]]
         self._debug(format('stream {0!s}: {1} -> {2}',
                            stream[0], state, future))
         if future is None:
             break
         # update state
         (state, terminals) = future
         size += 1
         # it might be faster to use size as an index here  - it's a
         # trade-odd depending on line length.  probably worth measuring.
         stream = stream[1:]
         # match is strictly increasing, so storing the length is enough
         # (no need to make an expensive copy)
         if terminals:
             longest = (terminals, size, stream)
     return longest
Beispiel #50
0
 def push_level(self, level):
     '''
     Add a new indent level.
     '''
     self.__stack.append(level)
     self.__state[BlockMonitor] = level
     self._debug(format('Indent -> {0:d}', level))
Beispiel #51
0
 def test_random(self):
     '''
     Compares lepl + python expressions.  This runs 'til it fails, and it
     always does fail, because lepl's expressions are guarenteed greedy
     while python's aren't.  This is "normal" (Perl is the same as Python)
     but I cannot fathom why it should be - it seems *harder* to make them
     wwork that way... 
     '''
     #basicConfig(level=DEBUG)
     #log = getLogger('lepl.reexgp._test.random')
     match_alphabet = '012'
     string_alphabet = '013'
     for _ in range(100):
         expression = random_expression(3, match_alphabet) 
         string = random_string(3, string_alphabet)
         matcher = DfaRegexp(expression)
         matcher.config.no_full_first_match()
         lepl_result = matcher.parse(string)
         if lepl_result:
             lepl_result = lepl_result[0]
         #log.debug(format('{0} {1} {2}', expression, string, lepl_result))
         try:
             python_result = compile_(expression).match(string) 
             if python_result:
                 python_result = python_result.group()
             assert lepl_result == python_result, \
                 format('{0} != {1}\n{2} {3}', 
                        lepl_result, python_result, expression, string)
         except:
             (e, v, _t) = exc_info()
             if repr(v) == "error('nothing to repeat',)":
                 pass
             else:
                 raise e
Beispiel #52
0
 def __repr__(self):
     '''
     Lazily evaluated for speed - saves 1/3 of time spent in constructor
     '''
     if not self.__cached_repr:
         self.__cached_repr = format('{0}({1!r})', self.matcher, self.stream)
     return self.__cached_repr
Beispiel #53
0
def find_tokens(matcher):
    '''
    Returns a set of Tokens.  Also asserts that children of tokens are
    not themselves Tokens. 
    
    Should we also check that a Token occurs somewhere on every path to a
    leaf node?
    '''
    (tokens, visited, non_tokens) = (set(), set(), set())
    stack = deque([matcher])
    while stack:
        matcher = stack.popleft()
        if matcher not in visited:
            if is_child(matcher, NonToken):
                non_tokens.add(matcher)
            if isinstance(matcher, BaseToken):
                tokens.add(matcher)
                if matcher.content:
                    assert_not_token(matcher.content, visited)
            else:
                for child in matcher:
                    if isinstance(child, Matcher):
                        stack.append(child)
            visited.add(matcher)
    if tokens and non_tokens:
        raise LexerError(
            format(
                'The grammar contains a mix of Tokens and non-Token '
                'matchers at the top level. If Tokens are used then '
                'non-token matchers that consume input must only '
                'appear "inside" Tokens.  The non-Token matchers '
                'include: {0}.', '; '.join(str(n) for n in non_tokens)))
    return tokens
Beispiel #54
0
def find_tokens(matcher):
    '''
    Returns a set of Tokens.  Also asserts that children of tokens are
    not themselves Tokens. 
    
    Should we also check that a Token occurs somewhere on every path to a
    leaf node?
    '''
    (tokens, visited, non_tokens) = (set(), set(), set())
    stack = deque([matcher])
    while stack:
        matcher = stack.popleft()
        if matcher not in visited:
            if is_child(matcher, NonToken):
                non_tokens.add(matcher)
            if isinstance(matcher, BaseToken):
                tokens.add(matcher)
                if matcher.content:
                    assert_not_token(matcher.content, visited)
            else:
                for child in matcher:
                    if isinstance(child, Matcher):
                        stack.append(child)
            visited.add(matcher)
    if tokens and non_tokens:
        raise LexerError(
            format('The grammar contains a mix of Tokens and non-Token '
                   'matchers at the top level. If Tokens are used then '
                   'non-token matchers that consume input must only '
                   'appear "inside" Tokens.  The non-Token matchers '
                   'include: {0}.',
                   '; '.join(str(n) for n in non_tokens)))
    return tokens
Beispiel #55
0
 def extension(self, text):
     """
     This is called for extensions for the form (*NAME) where NAME is any
     sequence of capitals.  It should return a character range.  Further
     uses of (*...) are still to be decided.
     """
     raise RegexpError(format("Extension {0!r} not supported by {1!s}", text, self.__class__))
Beispiel #56
0
 def clone_transform(use,
                     original,
                     matcher,
                     wrapper,
                     _raw=False,
                     _args=False):
     '''
     We can assume that wrapper is a transformation.  add joins into
     a sequence.
     '''
     assert isinstance(wrapper, TransformationWrapper)
     try:
         # this is the only place add is required
         (use, [regexp]) = RegexpContainer.to_regexps(use, [matcher],
                                                      add_reqd=True)
         log.debug(format('Transform: cloning {0}', regexp))
         if use and len(wrapper.functions) > 1 \
                 and wrapper.functions[0] is add:
             # we have additional functions, so cannot take regexp higher,
             # but use is True, so return a new matcher.
             # hack to copy across other functions
             original.wrapper = \
                 TransformationWrapper().extend(wrapper.functions[1:])
             log.debug('Transform: OK (final)')
             # NEED TEST FOR THIS
             return single(alphabet, original, regexp, matcher_type)
         elif len(wrapper.functions) == 1 and wrapper.functions[0] is add:
             # exactly what we wanted!  combine and continue
             log.debug('Transform: OK')
             return RegexpContainer.build(original,
                                          regexp,
                                          alphabet,
                                          matcher_type,
                                          use,
                                          transform=False)
         elif not wrapper:
             # we're just forwarding the add_reqd from before here
             log.debug('Transform: empty, add required')
             return RegexpContainer(original, regexp, use, add_reqd=True)
         else:
             log.debug(
                 format('Transform: wrong transformation: {0!r}',
                        original.wrapper))
             return original
     except Unsuitable:
         log.debug(format('Transform: not rewritten: {0}', original))
         return original
Beispiel #57
0
 def __args_as_attributes(self):
     '''
     Validate the arguments passed to the constructor against the spec for 
     the factory (necessary because we use *args and so the user doesn't
     get the feedback they will expect if they make a mistake).  As a side
     effect we also associated arguments with names and expand defaults
     so that attributes are more predictable.
     '''
     try:
         # function wrapper, so we have two levels, and we must construct
         # a new, empty function
         def empty(): return
         document(empty, self.factory.factory)
         spec = getargspec(empty)
     except:
         spec = getargspec(self.factory)
     names = list(spec.args)
     defaults = dict(zip(names[::-1], spec.defaults[::-1] if spec.defaults else []))
     for name in names:
         if name in self.__kargs:
             self._karg(**{name: self.__kargs[name]})
             del self.__kargs[name]
         elif self.__args:
             self._arg(**{name: self.__args[0]})
             self.__args = self.__args[1:]
         elif name in defaults:
             self._karg(**{name: defaults[name]})
         else:
             raise TypeError(format("No value for argument '{0}' in "
                                    "{1}(...)", 
                                    name, self._small_str))
     if self.__args:
         if spec.varargs:
             self._args(**{spec.varargs: self.__args})
         else:
             raise TypeError(format("No parameter matches the argument "
                                    "{0!r} in {1}(...)", 
                                    self.__args[0], self._small_str))
     if self.__kargs:
         if spec.keywords:
             self.__kargs(**{spec.keywords: self.__kargs})
         else:
             name = list(self.__kargs.keys())[0]
             value = self.__kargs[name]
             raise TypeError(format("No parameter matches the argument "
                                    "{0}={1!r} in {2}(...)", 
                                    name, value, self._small_str))