def test_string_lines(self): f = DEFAULT_STREAM_FACTORY s = f.from_string('line 1\nline 2\nline 3\n') (l, s) = s_line(s, False) assert l == 'line 1\n', l (l, _) = s_line(s, False) assert l == 'line 2\n', repr(l) locn = s_fmt(s, '{location}') assert locn == 'line 2, character 1', locn sl = s_stream(s, l) (_, sl) = s_next(sl, count=2) locn = s_fmt(sl, '{location}') assert locn == 'line 2, character 3', locn
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' try: id_ = s_id(stream) while not s_empty(stream): # avoid conflicts between tokens id_ += 1 try: (terminals, match, next_stream) = \ self.t_regexp.match(stream) self._debug(fmt('Token: {0!r} {1!r} {2!s}', terminals, match, s_debug(stream))) yield (terminals, s_stream(stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_stream) = \ self.s_regexp.size_match(stream) self._debug(fmt('Space: {0!r} {1!s}', terminals, s_debug(stream))) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' try: id_ = s_id(stream) while not s_empty(stream): # avoid conflicts between tokens id_ += 1 try: (terminals, match, next_stream) = \ self.t_regexp.match(stream) self._debug( fmt('Token: {0!r} {1!r} {2!s}', terminals, match, s_debug(stream))) yield (terminals, s_stream(stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_stream) = \ self.s_regexp.size_match(stream) self._debug( fmt('Space: {0!r} {1!s}', terminals, s_debug(stream))) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def test_all(self): lines = iter(['first line', 'second line', 'third line']) f = DEFAULT_STREAM_FACTORY s1 = f(lines) # just created assert not s_empty(s1) # get first line (l1, s2) = s_line(s1, False) assert 'first line' == l1, l1 # get first character of next line (c21, s21) = s_next(s2) assert c21 == 's', c21 # and test fmtting locn = s_fmt(s21, '{location}: {rest}') assert locn == "line 2, character 2: 'econd line'", locn # then get rest of second line (c22, s3) = s_next(s21, count=len('econd line')) assert c22 == 'econd line', c22 d = s_debug(s21) assert d == "1:'e'", d # and move on to third line (c31, s31) = s_next(s3) assert c31 == 't', c31 (c32, s32) = s_next(s31) assert c32 == 'h', c32 # now try branching (think tokens) at line 1 s10 = s_stream(s2, l1) (l1, s20) = s_line(s10, False) assert l1 == 'first line', l1 assert not s_empty(s20) (c1, s11) = s_next(s10) assert c1 == 'f', c1 d = s_debug(s11) assert d == "1:'i'", d # finally look at max depth (which was after 'h' in third line) m = s_deepest(s1) locn = s_fmt(m, '{location}: {rest}') assert locn == "line 3, character 3: 'ird line'", locn
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' id_ = s_id(stream) try: while not s_empty(stream): # caches for different tokens with same contents differ id_ += 1 (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) size = 0 # if we use blocks, match leading space if self.blocks: try: (_, size, _) = self.s_regexp.size_match(line_stream) except TypeError: pass # this will be empty (size=0) if blocks unused (indent, next_line_stream) = s_next(line_stream, count=size) indent = indent.replace('\t', self._tab) yield ((START,), s_stream(line_stream, indent, id_=id_, max=max)) line_stream = next_line_stream while not s_empty(line_stream): id_ += 1 try: (terminals, match, next_line_stream) = \ self.t_regexp.match(line_stream) yield (terminals, s_stream(line_stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_line_stream) = \ self.s_regexp.size_match(line_stream) line_stream = next_line_stream id_ += 1 yield ((END,), s_stream(line_stream, '', max=max, id_=id_)) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def match(support, stream): # we increment id so that different strings (which might overlap or # be contiguous) don't affect each other's memoisation (the hash key # is based on offset and ('one past the') end of one column can have # the same offset as the start of the next). id_ = s_id(stream) # extract a line (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) results = [] for ((left, right), matcher) in zip(indices, matchers): id_ += 1 # extract the location in the line (_, left_aligned_line_stream) = s_next(line_stream, count=left) (word, _) = s_next(left_aligned_line_stream, count=right-left) support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word)) word_stream = s_stream(left_aligned_line_stream, word, id_=id_) # do the match support._debug(s_fmt(word_stream, 'matching {rest}')) (result, _) = yield matcher._match(word_stream) results.extend(result) support._debug(repr(results)) yield (results, next_stream)
def match(support, stream): # we increment id so that different strings (which might overlap or # be contiguous) don't affect each other's memoisation (the hash key # is based on offset and ('one past the') end of one column can have # the same offset as the start of the next). id_ = s_id(stream) # extract a line (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) results = [] for ((left, right), matcher) in zip(indices, matchers): id_ += 1 # extract the location in the line (_, left_aligned_line_stream) = s_next(line_stream, count=left) (word, _) = s_next(left_aligned_line_stream, count=right - left) support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word)) word_stream = s_stream(left_aligned_line_stream, word, id_=id_) # do the match support._debug(s_fmt(word_stream, 'matching {rest}')) (result, _) = yield matcher._match(word_stream) results.extend(result) support._debug(repr(results)) yield (results, next_stream)
def __init__(self, stream): super(FullFirstMatchException, self).__init__( s_fmt(s_deepest(stream), 'The match failed in {filename} at {rest} ({location}).'))
def fmt(self, state, template, prefix='', kargs=None): line_stream = state_to_line_stream(state) return s_fmt(line_stream, template, prefix=prefix, kargs=kargs)