def test_single_value(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'a'), (f.from_sequence, [1]), (f.from_sequence, (2, )), (f.from_string, 'b'), (f.from_list, ['c'])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def test_single_value(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'a'), (f.from_sequence, [1]), (f.from_sequence, (2,)), (f.from_string, 'b'), (f.from_list, ['c'])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' try: id_ = s_id(stream) while not s_empty(stream): # avoid conflicts between tokens id_ += 1 try: (terminals, match, next_stream) = \ self.t_regexp.match(stream) self._debug(fmt('Token: {0!r} {1!r} {2!s}', terminals, match, s_debug(stream))) yield (terminals, s_stream(stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_stream) = \ self.s_regexp.size_match(stream) self._debug(fmt('Space: {0!r} {1!s}', terminals, s_debug(stream))) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' try: id_ = s_id(stream) while not s_empty(stream): # avoid conflicts between tokens id_ += 1 try: (terminals, match, next_stream) = \ self.t_regexp.match(stream) self._debug( fmt('Token: {0!r} {1!r} {2!s}', terminals, match, s_debug(stream))) yield (terminals, s_stream(stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_stream) = \ self.s_regexp.size_match(stream) self._debug( fmt('Space: {0!r} {1!s}', terminals, s_debug(stream))) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def test_two_values(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'ab'), (f.from_sequence, [1, 2]), (f.from_sequence, (2, 3)), (f.from_string, 'bc'), (f.from_list, ['c', 6])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data[0:1] (value, n) = s_next(n) assert value == data[1:2] assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' id_ = s_id(stream) try: while not s_empty(stream): # caches for different tokens with same contents differ id_ += 1 (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) size = 0 # if we use blocks, match leading space if self.blocks: try: (_, size, _) = self.s_regexp.size_match(line_stream) except TypeError: pass # this will be empty (size=0) if blocks unused (indent, next_line_stream) = s_next(line_stream, count=size) indent = indent.replace('\t', self._tab) yield ((START,), s_stream(line_stream, indent, id_=id_, max=max)) line_stream = next_line_stream while not s_empty(line_stream): id_ += 1 try: (terminals, match, next_line_stream) = \ self.t_regexp.match(line_stream) yield (terminals, s_stream(line_stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_line_stream) = \ self.s_regexp.size_match(line_stream) line_stream = next_line_stream id_ += 1 yield ((END,), s_stream(line_stream, '', max=max, id_=id_)) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def Eof(support, stream): ''' Match the end of a stream. Returns nothing. This is also aliased to Eos in lepl.derived. ''' if s_empty(stream): return [], stream
def test_two_values(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'ab'), (f.from_sequence, [1, 2]), (f.from_sequence, (2,3)), (f.from_string, 'bc'), (f.from_list, ['c', 6])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data[0:1] (value, n) = s_next(n) assert value == data[1:2] assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def Eof(support, stream): ''' Match the end of a stream. Returns nothing. This is also aliased to Eos in lepl.derived. ''' if s_empty(stream): return ([], stream)
def line(self, state, empty_ok): try: (cons, line_stream) = state if s_empty(line_stream): cons = cons.tail line_stream = self._next_line(cons, line_stream) (value, empty_line_stream) = s_line(line_stream, empty_ok) return (value, ((cons, empty_line_stream), self)) except StopIteration: if empty_ok: raise TypeError('Iterable stream cannot return an empty line') else: raise
def test_all(self): lines = iter(['first line', 'second line', 'third line']) f = DEFAULT_STREAM_FACTORY s1 = f(lines) # just created assert not s_empty(s1) # get first line (l1, s2) = s_line(s1, False) assert 'first line' == l1, l1 # get first character of next line (c21, s21) = s_next(s2) assert c21 == 's', c21 # and test fmtting locn = s_fmt(s21, '{location}: {rest}') assert locn == "line 2, character 2: 'econd line'", locn # then get rest of second line (c22, s3) = s_next(s21, count=len('econd line')) assert c22 == 'econd line', c22 d = s_debug(s21) assert d == "1:'e'", d # and move on to third line (c31, s31) = s_next(s3) assert c31 == 't', c31 (c32, s32) = s_next(s31) assert c32 == 'h', c32 # now try branching (think tokens) at line 1 s10 = s_stream(s2, l1) (l1, s20) = s_line(s10, False) assert l1 == 'first line', l1 assert not s_empty(s20) (c1, s11) = s_next(s10) assert c1 == 'f', c1 d = s_debug(s11) assert d == "1:'i'", d # finally look at max depth (which was after 'h' in third line) m = s_deepest(s1) locn = s_fmt(m, '{location}: {rest}') assert locn == "line 3, character 3: 'ird line'", locn
def test_empty(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, ''), (f.from_sequence, []), (f.from_sequence, ()), (f.from_string, ''), (f.from_list, [])): s = constructor(data) assert s_empty(s) try: s_next(s) assert False, fmt('expected error: {0}', s) except StopIteration: pass try: s_line(s, False) assert False, fmt('expected error: {0}', s) except StopIteration: pass
def _matcher(support, stream1): # set default maxdepth s_next(stream1, count=0) # first match generator = matcher._match(stream1) try: (result2, stream2) = yield generator if eos and not s_empty(stream2): raise FullFirstMatchException(stream2) else: yield (result2, stream2) except StopIteration: raise FullFirstMatchException(stream1) # subsequent matches: while True: result = yield generator yield result
def next(self, state, count=1): (cons, line_stream) = state try: (value, next_line_stream) = s_next(line_stream, count=count) return (value, ((cons, next_line_stream), self)) except StopIteration: # the general approach here is to take what we can from the # current line, create the next, and take the rest from that. # of course, that may also not have enough, in which case it # will recurse. cons = cons.tail if s_empty(line_stream): next_line_stream = self._next_line(cons, line_stream) next_stream = ((cons, next_line_stream), self) return s_next(next_stream, count=count) else: (line, end_line_stream) = s_line(line_stream, False) next_line_stream = self._next_line(cons, end_line_stream) next_stream = ((cons, next_line_stream), self) (extra, final_stream) = s_next(next_stream, count=count-len(line)) value = s_join(line_stream, line, extra) return (value, final_stream)
def next(self, state, count=1): (cons, line_stream) = state try: (value, next_line_stream) = s_next(line_stream, count=count) return (value, ((cons, next_line_stream), self)) except StopIteration: # the general approach here is to take what we can from the # current line, create the next, and take the rest from that. # of course, that may also not have enough, in which case it # will recurse. cons = cons.tail if s_empty(line_stream): next_line_stream = self._next_line(cons, line_stream) next_stream = ((cons, next_line_stream), self) return s_next(next_stream, count=count) else: (line, end_line_stream) = s_line(line_stream, False) next_line_stream = self._next_line(cons, end_line_stream) next_stream = ((cons, next_line_stream), self) (extra, final_stream) = s_next(next_stream, count=count - len(line)) value = s_join(line_stream, line, extra) return (value, final_stream)
def _match(self, stream): ''' On matching we first assert that the token type is correct and then delegate to the content. ''' if not self.compiled: raise LexerError( fmt('A {0} token has not been compiled. ' 'You must use the lexer rewriter with Tokens. ' 'This can be done by using matcher.config.lexer().', self.__class__.__name__)) ((tokens, line_stream), next_stream) = s_next(stream) if self.id_ in tokens: if self.content is None: # result contains all data (use s_next not s_line to set max) (line, _) = s_line(line_stream, True) (line, _) = s_next(line_stream, count=len(line)) yield ([line], next_stream) else: generator = self.content._match(line_stream) while True: (result, next_line_stream) = yield generator if s_empty(next_line_stream) or not self.complete: yield (result, next_stream)
def match(self, stream): ''' Use the table to match a stream. The stack holds the current state, which is consumed from left to right. An entry on the stack contains: - map_ - a map from character to [(dest state, terminals)] - matched - the [(dest state, terminals)] generated by the map for a given character - empties - empty transitions for this state - match - the current match, as a list of tokens consumed from the stream - stream - the current stream ''' #self._debug(str(self.__table)) stack = deque() (map_, empties) = self.__table[0] stack.append((map_, None, empties, [], stream)) while stack: #self._debug(str(stack)) (map_, matched, empties, match, stream) = stack.pop() if not map_ and not matched and not empties: # if we have no more transitions, drop pass elif map_: # re-add empties with old match stack.append((None, None, empties, match, stream)) # and try matching a character if not s_empty(stream): (value, next_stream) = s_next(stream) try: matched = map_[value] if matched: stack.append((None, matched, None, match + [value], next_stream)) except IndexError: pass elif matched: (dest, terminal) = matched[-1] # add back reduced matched if len(matched) > 1: # avoid discard iteration stack.append((map_, matched[:-1], empties, match, stream)) # and expand this destination (map_, empties) = self.__table[dest] stack.append((map_, None, empties, match, stream)) if terminal: yield (terminal, self.__alphabet.join(match), stream) else: # we must have an empty transition (dest, terminal) = empties[-1] # add back reduced empties if len(empties) > 1: # avoid discard iteration stack.append((map_, matched, empties[:-1], match, stream)) # and expand this destination (map_, empties) = self.__table[dest] stack.append((map_, None, empties, match, stream)) if terminal: yield (terminal, self.__alphabet.join(match), stream)