def match(support, stream): (line, _) = s_line(stream, True) match = pattern.match(line) if match: eaten = len(match.group()) if match.groups(): return (list(match.groups()), s_next(stream, count=eaten)[1]) else: return ([match.group()], s_next(stream, count=eaten)[1])
def end_group(self, number, offset): assert isinstance(number, int) assert number in self.__offsets, 'Unopened group: ' + str(number) self.__str = None (_, stream) = s_next(self.__stream, self.__offsets[number]) (text, _) = s_next(stream, offset - self.__offsets[number]) self.__groups[number] = (text, self.__offsets[number], offset) del self.__offsets[number] if number: # avoid group 0 self.__last_index = number
def _final(self): '''Current character is last?''' if not self._excess: try: (_, stream) = s_next(self._stream) try: s_next(stream) except StopIteration: return True except StopIteration: pass return False
def _advance(self, delta=1): ''' Move forwards in the stream. I've tried to optimise for the common (delta=1) case. The following conventions are followed: - `offset` is the offset from the initial input - `stream` is the stream starting at the current location - `next_stream` is the stream after current - `current` is the character at the current location - `previous` is the character just before the current location - `excess` is the amount by which we advanced past the end If `excess` is set, streams should not be used. ''' assert delta >= 0 self._offset += delta if self._excess: self._excess += delta self._previous = None elif delta == 1: self._stream = self._next_stream self._previous = self._current try: (self._current, self._next_stream) = s_next(self._next_stream) except StopIteration: self._current = None self._next_stream = None self._excess = 1 elif delta: old_stream = self._stream try: (advanced, self._stream) = s_next(old_stream, delta) self._previous = advanced[-1:] try: (self._current, self._next_stream) = s_next(self._stream) except StopIteration: self._current = None self._next_stream = None self._excess = 1 except StopIteration: self._stream = None self._next_stream = None self._current = None self._previous = None self._excess = delta - s_len(old_stream) + 1 return True
def _match(self, stream_in): ''' Pull indent and call the policy and update the global value, then evaluate the contents. ''' # detect a nested call key = s_key(stream_in) if key in self.__streams: self._debug('Avoided left recursive call to Block.') return self.__streams.add(key) try: ((tokens, token_stream), _) = s_next(stream_in) (indent, _) = s_line(token_stream, True) if START not in tokens: raise StopIteration current = self.__monitor.indent policy = self.policy(current, indent) generator = And(*self.lines)._match(stream_in) while True: self.__monitor.push_level(policy) try: results = yield generator finally: self.__monitor.pop_level() yield results finally: self.__streams.remove(key)
def size_match(self, stream): ''' Match against the stream, but return the length of the match. ''' state = 0 size = 0 longest = (self.__empty_labels, 0, stream) \ if self.__empty_labels else None (line, _) = s_line(stream, True) while size < len(line): future = self.__table[state][line[size]] if future is None: break # update state (state, terminals) = future size += 1 # match is strictly increasing, so storing the length is enough # (no need to make an expensive copy) if terminals: try: (_, next_stream) = s_next(stream, count=size) longest = (terminals, size, next_stream) except StopIteration: pass return longest
def test_two_values(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'ab'), (f.from_sequence, [1, 2]), (f.from_sequence, (2,3)), (f.from_string, 'bc'), (f.from_list, ['c', 6])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data[0:1] (value, n) = s_next(n) assert value == data[1:2] assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def _matcher(support, stream1): # set default maxdepth s_next(stream1, count=0) # first match generator = matcher._match(stream1) try: (result2, stream2) = yield generator if eos and not s_empty(stream2): raise FullFirstMatchException(stream2) else: yield (result2, stream2) except StopIteration: raise FullFirstMatchException(stream1) # subsequent matches: while True: result = yield generator yield result
def Digit(support, stream): digits = {'1': '', '2': 'abc', '3': 'def', '4': 'ghi', '5': 'jkl', '6': 'mno', '7': 'pqrs', '8': 'tuv', '9': 'wxyz', '0': ''} (digit, next_stream) = s_next(stream) yield ([digit], next_stream) for letter in digits.get(digit, ''): yield ([letter], next_stream)
def end_group(self, number, offset): # copy (for write) groups = dict(self.__groups) self.__groups = groups # we know key is present, so can ignore that old_triple = groups[number] (_text, start, end) = old_triple # remove old value from hash if end is not None: self.__hash ^= end << 24 # TODO - maybe this should be postponed (_, stream) = s_next(self.__stream, start) (text, _) = s_next(stream, offset - start) new_triple = (text, start, offset) # add new value to hash self.__hash ^= offset << 24 # and store groups[number] = new_triple if number != 0: self.__last_number = number
def test_empty(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, ''), (f.from_sequence, []), (f.from_sequence, ()), (f.from_string, ''), (f.from_list, [])): s = constructor(data) assert s_empty(s) try: s_next(s) assert False, fmt('expected error: {0}', s) except StopIteration: pass try: s_line(s, False) assert False, fmt('expected error: {0}', s) except StopIteration: pass
def _match(self, stream): ''' Do the matching (return a generator that provides successive (result, stream) tuples). Need to be careful here to use only the restricted functionality provided by the stream interface. ''' (value, next_stream) = s_next(stream, count=self.length) yield ([self._convert(value)], next_stream)
def match(self, stream_in): ''' Match against the stream. ''' try: (terminals, size, _) = self.size_match(stream_in) (value, stream_out) = s_next(stream_in, count=size) return (terminals, value, stream_out) except TypeError: # the matcher returned None return None
def _reset(self, offset, stream, previous): self._previous = previous self._stream = stream self._offset = offset self._excess = 0 try: (self._current, self._next_stream) = s_next(stream) except StopIteration: self._current = None self._next_stream = None self._excess = 1
def string(self, next, text): length = len(text) if length == 1: if self._current == text[0:1]: return True else: try: (advanced, _) = s_next(self._stream, length) if advanced == text: self._states.append((next, self._start, length)) except StopIteration: pass raise Fail
def next(self, state, count=1): (cons, line_stream) = state try: (value, next_line_stream) = s_next(line_stream, count=count) return (value, ((cons, next_line_stream), self)) except StopIteration: # the general approach here is to take what we can from the # current line, create the next, and take the rest from that. # of course, that may also not have enough, in which case it # will recurse. cons = cons.tail if s_empty(line_stream): next_line_stream = self._next_line(cons, line_stream) next_stream = ((cons, next_line_stream), self) return s_next(next_stream, count=count) else: (line, end_line_stream) = s_line(line_stream, False) next_line_stream = self._next_line(cons, end_line_stream) next_stream = ((cons, next_line_stream), self) (extra, final_stream) = s_next(next_stream, count=count-len(line)) value = s_join(line_stream, line, extra) return (value, final_stream)
def match(support, stream): # we increment id so that different strings (which might overlap or # be contiguous) don't affect each other's memoisation (the hash key # is based on offset and ('one past the') end of one column can have # the same offset as the start of the next). id_ = s_id(stream) # extract a line (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) results = [] for ((left, right), matcher) in zip(indices, matchers): id_ += 1 # extract the location in the line (_, left_aligned_line_stream) = s_next(line_stream, count=left) (word, _) = s_next(left_aligned_line_stream, count=right-left) support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word)) word_stream = s_stream(left_aligned_line_stream, word, id_=id_) # do the match support._debug(s_fmt(word_stream, 'matching {rest}')) (result, _) = yield matcher._match(word_stream) results.extend(result) support._debug(repr(results)) yield (results, next_stream)
def _match(self, stream): ''' On matching we first assert that the token type is correct and then delegate to the content. ''' if not self.compiled: raise LexerError( fmt('A {0} token has not been compiled. ' 'You must use the lexer rewriter with Tokens. ' 'This can be done by using matcher.config.lexer().', self.__class__.__name__)) ((tokens, _), next_stream) = s_next(stream) if self.id_ in tokens: yield ([], next_stream)
def match(support, stream): ''' Do the matching (return a generator that provides successive (result, stream) tuples). Need to be careful here to use only the restricted functionality provided by the stream interface. ''' try: (value, next_stream) = s_next(stream, count=delta) if text == value: return ([value], next_stream) except IndexError: pass
def lookahead(self, next, equal, forwards, mutates, reads, length): # todo - could also cache things that read groups by state # discard old values if self._lookaheads[0] != self._offset: self._lookaheads = (self._offset, {}) lookaheads = self._lookaheads[1] # approach here different from simple engine as not all # results can be cached match = False if next[1] in lookaheads: success = lookaheads[next[1]] else: # we need to match the lookahead search = False size = None if (reads and mutates) else \ length(self._state.groups(self._parser_state.groups)) if forwards: stream = self._initial_stream offset = self._offset else: (text, _) = s_next(self._initial_stream, self._offset) stream = s_stream(self._initial_stream, text) if size is None: offset = 0 search = True else: offset = self._offset - size if offset >= 0: new_state = self._state.clone(next[1], stream=stream) self._push() try: match = self._run_from(new_state, stream, offset, search) new_state = self._state finally: self._pop() success = bool(match) == equal if not (mutates or reads): lookaheads[next[1]] = success # if lookahead succeeded, continue if success: if mutates and match: self._state.merge_groups(new_state) self._states.append(self._state.advance(next[0])) raise Fail
def test_single_value(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'a'), (f.from_sequence, [1]), (f.from_sequence, (2,)), (f.from_string, 'b'), (f.from_list, ['c'])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def _match(self, stream): ''' On matching we first assert that the token type is correct and then delegate to the content. ''' if not self.compiled: raise LexerError( fmt('A {0} token has not been compiled. ' 'You must use the lexer rewriter with Tokens. ' 'This can be done by using matcher.config.lexer().', self.__class__.__name__)) ((tokens, line_stream), next_stream) = s_next(stream) if self.id_ in tokens: if self.content is None: # result contains all data (use s_next not s_line to set max) (line, _) = s_line(line_stream, True) (line, _) = s_next(line_stream, count=len(line)) yield ([line], next_stream) else: generator = self.content._match(line_stream) while True: (result, next_line_stream) = yield generator if s_empty(next_line_stream) or not self.complete: yield (result, next_stream)
def test_string_lines(self): f = DEFAULT_STREAM_FACTORY s = f.from_string('line 1\nline 2\nline 3\n') (l, s) = s_line(s, False) assert l == 'line 1\n', l (l, _) = s_line(s, False) assert l == 'line 2\n', repr(l) locn = s_fmt(s, '{location}') assert locn == 'line 2, character 1', locn sl = s_stream(s, l) (_, sl) = s_next(sl, count=2) locn = s_fmt(sl, '{location}') assert locn == 'line 2, character 3', locn
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' id_ = s_id(stream) try: while not s_empty(stream): # caches for different tokens with same contents differ id_ += 1 (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) size = 0 # if we use blocks, match leading space if self.blocks: try: (_, size, _) = self.s_regexp.size_match(line_stream) except TypeError: pass # this will be empty (size=0) if blocks unused (indent, next_line_stream) = s_next(line_stream, count=size) indent = indent.replace('\t', self._tab) yield ((START,), s_stream(line_stream, indent, id_=id_, max=max)) line_stream = next_line_stream while not s_empty(line_stream): id_ += 1 try: (terminals, match, next_line_stream) = \ self.t_regexp.match(line_stream) yield (terminals, s_stream(line_stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_line_stream) = \ self.s_regexp.size_match(line_stream) line_stream = next_line_stream id_ += 1 yield ((END,), s_stream(line_stream, '', max=max, id_=id_)) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def match(support, stream): ''' Do the matching. The result will be a single matching character. ''' (value, next_stream) = s_next(stream) if restrict: try: if value not in restrict: raise StopIteration except TypeError: # it would be nice to make this an error, but for line aware # parsing (and any other heterogeneous input) it's legal if not warned[0]: support._warn(fmt('Cannot restrict {0} with {1!r}', value, restrict)) warned[0] = True raise StopIteration return ([value], next_stream)
def lookahead(self, next, equal, forwards, mutates, reads, length): # discard old values if self._lookaheads[0] != self._offset: self._lookaheads = (self._offset, {}) lookaheads = self._lookaheads[1] if next[1] not in lookaheads: # requires complex engine if reads: raise UnsupportedOperation('lookahead') size = None if (reads and mutates) else length(None) # invoke simple engine and cache self._push() try: if forwards: stream = self._initial_stream pos = self._offset search = False else: (text, _) = s_next(self._initial_stream, self._offset) stream = s_stream(self._initial_stream, text) if size is None: pos = 0 search = True else: pos = self._offset - size search = False if pos >= 0: result = bool(self._run_from(next[1], stream, pos, search)) == equal else: result = not equal finally: self._pop() lookaheads[next[1]] = result if lookaheads[next[1]]: return next[0] else: raise Fail
def clone(self, offset=None, groups=None): ''' Duplicate this state. If offset is specified, it must be greater than or equal the existing offset; then the text and offset of the clone will be consistent with the new value. If groups is given it replaces the previous groups. ''' if groups is None: groups = self.__groups.clone() previous = self._previous if offset is None or offset == self._offset: offset = self._offset stream = self._stream else: delta = offset - self._offset (advanced, stream) = s_next(self._stream, delta) previous = advanced[-1:] checkpoints = set(self.__checkpoints) if self.__checkpoints else None return State(self._parser_state, stream, groups, previous=previous, offset=offset, loops=self.__loops.clone(), checkpoints=checkpoints)
def lookahead(self, next, equal, forwards, mutates, reads, length): self.ticks += 1 alternate = next[1] if alternate not in self.__lookaheads: self.__lookaheads[alternate] = {} if self.__state._offset in self.__lookaheads[alternate]: success = self.__lookaheads[alternate[self.__state._offset]] else: size = None if (reads and mutates) else length(self.__state.groups) search = False if forwards: clone = State(self.__state._parser_state, self.__state._stream, self.__state.groups.clone()) else: if size is not None and size > self.__state._offset and equal: raise Fail (text, _) = s_next(self.__stream, self.__state._offset) stream = s_stream(self.__stream, text) if size is None or size > self.__state._offset: search = True pos = None else: pos = self.__state._offset - size clone = State(self.__state._parser_state, stream, self.__state.groups.clone(), pos=pos) (match, clone) = self.__run(alternate, clone, search=search) success = match == equal if not (reads or mutates): self.__lookaheads[alternate][self.__state._offset] = success # if lookahead succeeded, continue if success: if mutates: self.__state = self.__state.clone(groups=clone.groups) return next[0] else: raise Fail
def match(self, stream): ''' Use the table to match a stream. The stack holds the current state, which is consumed from left to right. An entry on the stack contains: - map_ - a map from character to [(dest state, terminals)] - matched - the [(dest state, terminals)] generated by the map for a given character - empties - empty transitions for this state - match - the current match, as a list of tokens consumed from the stream - stream - the current stream ''' #self._debug(str(self.__table)) stack = deque() (map_, empties) = self.__table[0] stack.append((map_, None, empties, [], stream)) while stack: #self._debug(str(stack)) (map_, matched, empties, match, stream) = stack.pop() if not map_ and not matched and not empties: # if we have no more transitions, drop pass elif map_: # re-add empties with old match stack.append((None, None, empties, match, stream)) # and try matching a character if not s_empty(stream): (value, next_stream) = s_next(stream) try: matched = map_[value] if matched: stack.append((None, matched, None, match + [value], next_stream)) except IndexError: pass elif matched: (dest, terminal) = matched[-1] # add back reduced matched if len(matched) > 1: # avoid discard iteration stack.append((map_, matched[:-1], empties, match, stream)) # and expand this destination (map_, empties) = self.__table[dest] stack.append((map_, None, empties, match, stream)) if terminal: yield (terminal, self.__alphabet.join(match), stream) else: # we must have an empty transition (dest, terminal) = empties[-1] # add back reduced empties if len(empties) > 1: # avoid discard iteration stack.append((map_, matched, empties[:-1], match, stream)) # and expand this destination (map_, empties) = self.__table[dest] stack.append((map_, None, empties, match, stream)) if terminal: yield (terminal, self.__alphabet.join(match), stream)
def match(support, stream): (char, next_stream) = s_next(stream) if char in chars: return ([char], next_stream)
def any_char(support, stream): while True: (char, stream) = s_next(stream) yield ([char], stream)
def match(support, stream): while True: (char, stream) = s_next(stream) if char in chars: yield ([char], stream)
def capital(support, stream): (char, next_stream) = s_next(stream) if char in ascii_uppercase: return ([char], next_stream)
def _match(self, stream): (value, next_stream) = s_next(stream) for i in range(value[0]): yield ([i], next_stream)
def char(support, stream): (char, stream) = s_next(stream) return ([char], stream)