Exemple #1
0
 def match(support, stream):
     (line, _) = s_line(stream, True)
     match = pattern.match(line)
     if match:
         eaten = len(match.group())
         if match.groups():
             return (list(match.groups()), s_next(stream, count=eaten)[1])
         else:
             return ([match.group()], s_next(stream, count=eaten)[1])
Exemple #2
0
 def end_group(self, number, offset):
     assert isinstance(number, int)
     assert number in self.__offsets, 'Unopened group: ' + str(number)
     self.__str = None
     (_, stream) = s_next(self.__stream, self.__offsets[number])
     (text, _) = s_next(stream, offset - self.__offsets[number])
     self.__groups[number] = (text, self.__offsets[number], offset)
     del self.__offsets[number]
     if number: # avoid group 0
         self.__last_index = number
Exemple #3
0
 def _final(self):
     '''Current character is last?'''
     if not self._excess:
         try:
             (_, stream) = s_next(self._stream)
             try:
                 s_next(stream)
             except StopIteration:
                 return True
         except StopIteration:
             pass
     return False
Exemple #4
0
    def _advance(self, delta=1):
        '''
        Move forwards in the stream.

        I've tried to optimise for the common (delta=1) case.

        The following conventions are followed:
        - `offset` is the offset from the initial input
        - `stream` is the stream starting at the current location
        - `next_stream` is the stream after current
        - `current` is the character at the current location
        - `previous` is the character just before the current location
        - `excess` is the amount by which we advanced past the end

        If `excess` is set, streams should not be used.
        '''
        assert delta >= 0
        self._offset += delta
        if self._excess:
            self._excess += delta
            self._previous = None
        elif delta == 1:
            self._stream = self._next_stream
            self._previous = self._current
            try:
                (self._current, self._next_stream) = s_next(self._next_stream)
            except StopIteration:
                self._current = None
                self._next_stream = None
                self._excess = 1
        elif delta:
            old_stream = self._stream
            try:
                (advanced, self._stream) = s_next(old_stream, delta)
                self._previous = advanced[-1:]
                try:
                    (self._current, self._next_stream) = s_next(self._stream)
                except StopIteration:
                    self._current = None
                    self._next_stream = None
                    self._excess = 1
            except StopIteration:
                self._stream = None
                self._next_stream = None
                self._current = None
                self._previous = None
                self._excess = delta - s_len(old_stream) + 1
        return True
Exemple #5
0
 def _match(self, stream_in):
     '''
     Pull indent and call the policy and update the global value, 
     then evaluate the contents.
     '''
     # detect a nested call
     key = s_key(stream_in)
     if key in self.__streams:
         self._debug('Avoided left recursive call to Block.')
         return
     self.__streams.add(key)
     try:
         ((tokens, token_stream), _) = s_next(stream_in)
         (indent, _) = s_line(token_stream, True)
         if START not in tokens:
             raise StopIteration
         current = self.__monitor.indent
         policy = self.policy(current, indent)
         
         generator = And(*self.lines)._match(stream_in)
         while True:
             self.__monitor.push_level(policy)
             try:
                 results = yield generator
             finally:
                 self.__monitor.pop_level()
             yield results
     finally:
         self.__streams.remove(key)
Exemple #6
0
 def size_match(self, stream):
     '''
     Match against the stream, but return the length of the match.
     '''
     state = 0
     size = 0
     longest = (self.__empty_labels, 0, stream) \
                 if self.__empty_labels else None
     (line, _) = s_line(stream, True)
     while size < len(line):
         future = self.__table[state][line[size]]
         if future is None:
             break
         # update state
         (state, terminals) = future
         size += 1
         # match is strictly increasing, so storing the length is enough
         # (no need to make an expensive copy)
         if terminals:
             try:
                 (_, next_stream) = s_next(stream, count=size)
                 longest = (terminals, size, next_stream)
             except StopIteration:
                 pass
     return longest
Exemple #7
0
 def test_two_values(self):
     f = DEFAULT_STREAM_FACTORY
     for (constructor, data) in ((f.from_sequence, 'ab'), 
                                 (f.from_sequence, [1, 2]),
                                 (f.from_sequence, (2,3)),
                                 (f.from_string, 'bc'),
                                 (f.from_list, ['c', 6])):
         s = constructor(data)
         assert not s_empty(s)
         (value, n) = s_next(s)
         assert value == data[0:1]
         (value, n) = s_next(n)
         assert value == data[1:2]
         assert s_empty(n)
         (line, n) = s_line(s, False)
         assert line == data
         assert s_empty(n)
Exemple #8
0
 def _matcher(support, stream1):
     # set default maxdepth
     s_next(stream1, count=0)
     # first match
     generator = matcher._match(stream1)
     try:
         (result2, stream2) = yield generator
         if eos and not s_empty(stream2):
             raise FullFirstMatchException(stream2)
         else:
             yield (result2, stream2)
     except StopIteration:
         raise FullFirstMatchException(stream1)
     
     # subsequent matches:
     while True:
         result = yield generator
         yield result
Exemple #9
0
def Digit(support, stream):
    digits = {'1': '',     '2': 'abc',  '3': 'def',
              '4': 'ghi',  '5': 'jkl',  '6': 'mno',
              '7': 'pqrs', '8': 'tuv',  '9': 'wxyz',
              '0': ''}
    (digit, next_stream) = s_next(stream)
    yield ([digit], next_stream)
    for letter in digits.get(digit, ''):
        yield ([letter], next_stream)
Exemple #10
0
 def end_group(self, number, offset):
     # copy (for write)
     groups = dict(self.__groups)
     self.__groups = groups
     # we know key is present, so can ignore that
     old_triple = groups[number]
     (_text, start, end) = old_triple
     # remove old value from hash
     if end is not None: self.__hash ^= end << 24
     # TODO - maybe this should be postponed
     (_, stream) = s_next(self.__stream, start)
     (text, _) = s_next(stream, offset - start)
     new_triple = (text, start, offset)
     # add new value to hash
     self.__hash ^= offset << 24
     # and store
     groups[number] = new_triple
     if number != 0:
         self.__last_number = number
Exemple #11
0
 def test_empty(self):
     f = DEFAULT_STREAM_FACTORY
     for (constructor, data) in ((f.from_sequence, ''), 
                                 (f.from_sequence, []),
                                 (f.from_sequence, ()),
                                 (f.from_string, ''),
                                 (f.from_list, [])):
         s = constructor(data)
         assert s_empty(s)
         try:
             s_next(s)
             assert False, fmt('expected error: {0}', s) 
         except StopIteration:
             pass
         try:
             s_line(s, False)
             assert False, fmt('expected error: {0}', s) 
         except StopIteration:
             pass
Exemple #12
0
     def _match(self, stream):
         '''
         Do the matching (return a generator that provides successive 
         (result, stream) tuples).
 
         Need to be careful here to use only the restricted functionality
         provided by the stream interface.
         '''
         (value, next_stream) = s_next(stream, count=self.length)
         yield ([self._convert(value)], next_stream)
Exemple #13
0
 def match(self, stream_in):
     '''
     Match against the stream.
     '''
     try:
         (terminals, size, _) = self.size_match(stream_in)
         (value, stream_out) = s_next(stream_in, count=size)
         return (terminals, value, stream_out)
     except TypeError:
         # the matcher returned None
         return None
Exemple #14
0
 def _reset(self, offset, stream, previous):
     self._previous = previous
     self._stream = stream
     self._offset = offset
     self._excess = 0
     try:
         (self._current, self._next_stream) = s_next(stream)
     except StopIteration:
         self._current = None
         self._next_stream = None
         self._excess = 1
Exemple #15
0
 def string(self, next, text):
     length = len(text)
     if length == 1:
         if self._current == text[0:1]:
             return True
     else:
         try:
             (advanced, _) = s_next(self._stream, length)
             if advanced == text:
                 self._states.append((next, self._start, length))
         except StopIteration:
             pass
     raise Fail
Exemple #16
0
 def next(self, state, count=1):
     (cons, line_stream) = state
     try:
         (value, next_line_stream) = s_next(line_stream, count=count)
         return (value, ((cons, next_line_stream), self))
     except StopIteration:
         # the general approach here is to take what we can from the
         # current line, create the next, and take the rest from that.
         # of course, that may also not have enough, in which case it
         # will recurse.
         cons = cons.tail
         if s_empty(line_stream):
             next_line_stream = self._next_line(cons, line_stream)
             next_stream = ((cons, next_line_stream), self)
             return s_next(next_stream, count=count)
         else:
             (line, end_line_stream) = s_line(line_stream, False)
             next_line_stream = self._next_line(cons, end_line_stream)
             next_stream = ((cons, next_line_stream), self)
             (extra, final_stream) = s_next(next_stream, count=count-len(line))
             value = s_join(line_stream, line, extra)
             return (value, final_stream)
Exemple #17
0
 def match(support, stream):
     # we increment id so that different strings (which might overlap or
     # be contiguous) don't affect each other's memoisation (the hash key
     # is based on offset and ('one past the') end of one column can have
     # the same offset as the start of the next).
     id_ = s_id(stream)
     # extract a line
     (line, next_stream) = s_line(stream, False)
     line_stream = s_stream(stream, line)
     results = []
     for ((left, right), matcher) in zip(indices, matchers):
         id_ += 1
         # extract the location in the line
         (_, left_aligned_line_stream) = s_next(line_stream, count=left)
         (word, _) = s_next(left_aligned_line_stream, count=right-left)
         support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word))
         word_stream = s_stream(left_aligned_line_stream, word, id_=id_)
         # do the match
         support._debug(s_fmt(word_stream, 'matching {rest}'))
         (result, _) = yield matcher._match(word_stream)
         results.extend(result)
     support._debug(repr(results))
     yield (results, next_stream)
Exemple #18
0
 def _match(self, stream):
     '''
     On matching we first assert that the token type is correct and then
     delegate to the content.
     '''
     if not self.compiled:
         raise LexerError(
             fmt('A {0} token has not been compiled. '
                    'You must use the lexer rewriter with Tokens. '
                    'This can be done by using matcher.config.lexer().',
                    self.__class__.__name__))
     ((tokens, _), next_stream) = s_next(stream)
     if self.id_ in tokens:
         yield ([], next_stream)
Exemple #19
0
    def match(support, stream):
        '''
        Do the matching (return a generator that provides successive 
        (result, stream) tuples).

        Need to be careful here to use only the restricted functionality
        provided by the stream interface.
        '''
        try:
            (value, next_stream) = s_next(stream, count=delta)
            if text == value:
                return ([value], next_stream)
        except IndexError:
            pass
Exemple #20
0
    def lookahead(self, next, equal, forwards, mutates, reads, length):
        # todo - could also cache things that read groups by state
        
        # discard old values
        if self._lookaheads[0] != self._offset:
            self._lookaheads = (self._offset, {})
        lookaheads = self._lookaheads[1]

        # approach here different from simple engine as not all
        # results can be cached
        match = False
        if next[1] in lookaheads:
            success = lookaheads[next[1]]
        else:
            # we need to match the lookahead
            search = False
            size = None if (reads and mutates) else \
                length(self._state.groups(self._parser_state.groups))
            if forwards:
                stream = self._initial_stream
                offset = self._offset
            else:
                (text, _) = s_next(self._initial_stream, self._offset)
                stream = s_stream(self._initial_stream, text)
                if size is None:
                    offset = 0
                    search = True
                else:
                    offset = self._offset - size

            if offset >= 0:
                new_state = self._state.clone(next[1], stream=stream)
                self._push()
                try:
                    match = self._run_from(new_state, stream, offset, search)
                    new_state = self._state
                finally:
                    self._pop()

            success = bool(match) == equal
            if not (mutates or reads):
                lookaheads[next[1]] = success

        # if lookahead succeeded, continue
        if success:
            if mutates and match:
                self._state.merge_groups(new_state)
            self._states.append(self._state.advance(next[0]))
        raise Fail
Exemple #21
0
 def test_single_value(self):
     f = DEFAULT_STREAM_FACTORY
     for (constructor, data) in ((f.from_sequence, 'a'), 
                                 (f.from_sequence, [1]),
                                 (f.from_sequence, (2,)),
                                 (f.from_string, 'b'),
                                 (f.from_list, ['c'])):
         s = constructor(data)
         assert not s_empty(s)
         (value, n) = s_next(s)
         assert value == data
         assert s_empty(n)
         (line, n) = s_line(s, False)
         assert line == data
         assert s_empty(n)
Exemple #22
0
 def _match(self, stream):
     '''
     On matching we first assert that the token type is correct and then
     delegate to the content.
     '''
     if not self.compiled:
         raise LexerError(
             fmt('A {0} token has not been compiled. '
                    'You must use the lexer rewriter with Tokens. '
                    'This can be done by using matcher.config.lexer().',
                    self.__class__.__name__))
     ((tokens, line_stream), next_stream) = s_next(stream)
     if self.id_ in tokens:
         if self.content is None:
             # result contains all data (use s_next not s_line to set max)
             (line, _) = s_line(line_stream, True)
             (line, _) = s_next(line_stream, count=len(line))
             yield ([line], next_stream)
         else:
             generator = self.content._match(line_stream)
             while True:
                 (result, next_line_stream) = yield generator
                 if s_empty(next_line_stream) or not self.complete:
                     yield (result, next_stream)
Exemple #23
0
 def test_string_lines(self):
     f = DEFAULT_STREAM_FACTORY
     s = f.from_string('line 1\nline 2\nline 3\n')
     (l, s) = s_line(s, False)
     assert l == 'line 1\n', l
     (l, _) = s_line(s, False)
     assert l == 'line 2\n', repr(l)
     locn = s_fmt(s, '{location}')
     assert locn == 'line 2, character 1', locn
     sl = s_stream(s, l)
     (_, sl) = s_next(sl, count=2)
     locn = s_fmt(sl, '{location}')
     assert locn == 'line 2, character 3', locn
     
     
     
Exemple #24
0
 def _tokens(self, stream, max):
     '''
     Generate tokens, on demand.
     '''
     id_ = s_id(stream)
     try:
         while not s_empty(stream):
             
             # caches for different tokens with same contents differ
             id_ += 1
             (line, next_stream) = s_line(stream, False)
             line_stream = s_stream(stream, line)
             size = 0
             # if we use blocks, match leading space
             if self.blocks:
                 try:
                     (_, size, _) = self.s_regexp.size_match(line_stream)
                 except TypeError:
                     pass
             # this will be empty (size=0) if blocks unused 
             (indent, next_line_stream) = s_next(line_stream, count=size)
             indent = indent.replace('\t', self._tab)
             yield ((START,), 
                    s_stream(line_stream, indent, id_=id_, max=max))
             line_stream = next_line_stream
             
             while not s_empty(line_stream):
                 id_ += 1
                 try:
                     (terminals, match, next_line_stream) = \
                                     self.t_regexp.match(line_stream)
                     yield (terminals, s_stream(line_stream, match, 
                                                max=max, id_=id_))
                 except TypeError:
                     (terminals, _size, next_line_stream) = \
                                 self.s_regexp.size_match(line_stream)
                 line_stream = next_line_stream
                 
             id_ += 1
             yield ((END,), 
                    s_stream(line_stream, '', max=max, id_=id_))
             stream = next_stream
             
     except TypeError:
         raise RuntimeLexerError(
             s_fmt(stream, 
                   'No token for {rest} at {location} of {text}.'))
Exemple #25
0
 def match(support, stream):
     '''
     Do the matching.  The result will be a single matching character.
     '''
     (value, next_stream) = s_next(stream)
     if restrict:
         try:
             if value not in restrict:
                 raise StopIteration
         except TypeError:
             # it would be nice to make this an error, but for line aware
             # parsing (and any other heterogeneous input) it's legal
             if not warned[0]:
                 support._warn(fmt('Cannot restrict {0} with {1!r}',
                                       value, restrict))
                 warned[0] = True
                 raise StopIteration
     return ([value], next_stream)
Exemple #26
0
    def lookahead(self, next, equal, forwards, mutates, reads, length):

        # discard old values
        if self._lookaheads[0] != self._offset:
            self._lookaheads = (self._offset, {})
        lookaheads = self._lookaheads[1]

        if next[1] not in lookaheads:

            # requires complex engine
            if reads:
                raise UnsupportedOperation('lookahead')
            size = None if (reads and mutates) else length(None)

            # invoke simple engine and cache
            self._push()
            try:
                if forwards:
                    stream = self._initial_stream
                    pos = self._offset
                    search = False
                else:
                    (text, _) = s_next(self._initial_stream, self._offset)
                    stream = s_stream(self._initial_stream, text)
                    if size is None:
                        pos = 0
                        search = True
                    else:
                        pos = self._offset - size
                        search = False
                if pos >= 0:
                    result = bool(self._run_from(next[1], stream, pos, search)) == equal
                else:
                    result = not equal
            finally:
                self._pop()
            lookaheads[next[1]] = result

        if lookaheads[next[1]]:
            return next[0]
        else:
            raise Fail
Exemple #27
0
 def clone(self, offset=None, groups=None):
     '''
     Duplicate this state.  If offset is specified, it must be greater than
     or equal the existing offset; then the text and offset of the clone
     will be consistent with the new value.  If groups is given it replaces
     the previous groups.
     '''
     if groups is None:
         groups = self.__groups.clone()
     previous = self._previous
     if offset is None or offset == self._offset:
         offset = self._offset
         stream = self._stream
     else:
         delta = offset - self._offset
         (advanced, stream) = s_next(self._stream, delta)
         previous = advanced[-1:]
     checkpoints = set(self.__checkpoints) if self.__checkpoints else None
     return State(self._parser_state, stream, groups,
                  previous=previous, offset=offset,
                  loops=self.__loops.clone(), checkpoints=checkpoints)
Exemple #28
0
 def lookahead(self, next, equal, forwards, mutates, reads, length):
     self.ticks += 1
     alternate = next[1]
     if alternate not in self.__lookaheads:
         self.__lookaheads[alternate] = {}
     if self.__state._offset in self.__lookaheads[alternate]:
         success = self.__lookaheads[alternate[self.__state._offset]]
     else:
         size = None if (reads and mutates) else length(self.__state.groups)
         search = False
         if forwards:
             clone = State(self.__state._parser_state, self.__state._stream,
                           self.__state.groups.clone())
         else:
             if size is not None and size > self.__state._offset and equal:
                 raise Fail
             (text, _) = s_next(self.__stream, self.__state._offset)
             stream = s_stream(self.__stream, text)
             if size is None or size > self.__state._offset:
                 search = True
                 pos = None
             else:
                 pos = self.__state._offset - size
             clone = State(self.__state._parser_state, stream,
                           self.__state.groups.clone(), pos=pos)
         (match, clone) = self.__run(alternate, clone, search=search)
         success = match == equal
         if not (reads or mutates):
             self.__lookaheads[alternate][self.__state._offset] = success
     # if lookahead succeeded, continue
     if success:
         if mutates:
             self.__state = self.__state.clone(groups=clone.groups)
         return next[0]
     else:
         raise Fail
Exemple #29
0
    def match(self, stream):
        '''
        Use the table to match a stream.
        
        The stack holds the current state, which is consumed from left to
        right.  An entry on the stack contains:
        
          - map_ - a map from character to [(dest state, terminals)]

          - matched - the [(dest state, terminals)] generated by the map for
            a given character

          - empties - empty transitions for this state

          - match - the current match, as a list of tokens consumed from the 
          stream

          - stream - the current stream
        '''
        #self._debug(str(self.__table))
        stack = deque()
        (map_, empties) = self.__table[0]
        stack.append((map_, None, empties, [], stream))
        while stack:
            #self._debug(str(stack))
            (map_, matched, empties, match, stream) = stack.pop()
            if not map_ and not matched and not empties:
                # if we have no more transitions, drop
                pass
            elif map_:
                # re-add empties with old match
                stack.append((None, None, empties, match, stream))
                # and try matching a character
                if not s_empty(stream):
                    (value, next_stream) = s_next(stream)
                    try:
                        matched = map_[value]
                        if matched:
                            stack.append((None, matched, None,
                                          match + [value], next_stream))
                    except IndexError:
                        pass
            elif matched:
                (dest, terminal) = matched[-1]
                # add back reduced matched
                if len(matched) > 1: # avoid discard iteration
                    stack.append((map_, matched[:-1], empties, match, stream))
                # and expand this destination
                (map_, empties) = self.__table[dest]
                stack.append((map_, None, empties, match, stream))
                if terminal:
                    yield (terminal, self.__alphabet.join(match), stream)
            else:
                # we must have an empty transition
                (dest, terminal) = empties[-1]
                # add back reduced empties
                if len(empties) > 1: # avoid discard iteration
                    stack.append((map_, matched, empties[:-1], match, stream))
                # and expand this destination
                (map_, empties) = self.__table[dest]
                stack.append((map_, None, empties, match, stream))
                if terminal:
                    yield (terminal, self.__alphabet.join(match), stream)
Exemple #30
0
 def match(support, stream):
     (char, next_stream) = s_next(stream)
     if char in chars:
         return ([char], next_stream)
Exemple #31
0
def any_char(support, stream):
    while True:
        (char, stream) = s_next(stream)
        yield ([char], stream)
Exemple #32
0
 def match(support, stream):
     while True:
         (char, stream) = s_next(stream)
         if char in chars:
             yield ([char], stream)
Exemple #33
0
 def capital(support, stream):
     (char, next_stream) = s_next(stream)
     if char in ascii_uppercase:
         return ([char], next_stream)
Exemple #34
0
 def _match(self, stream):
     (value, next_stream) = s_next(stream)
     for i in range(value[0]):
         yield ([i], next_stream)
Exemple #35
0
 def _match(self, stream):
     (value, next_stream) = s_next(stream)
     for i in range(value[0]):
         yield ([i], next_stream)
Exemple #36
0
def char(support, stream):
    (char, stream) = s_next(stream)
    return ([char], stream)