Exemple #1
0
def find_tokens(matcher):
    '''
    Returns a set of Tokens.  Also asserts that children of tokens are
    not themselves Tokens. 
    
    Should we also check that a Token occurs somewhere on every path to a
    leaf node?
    '''
    (tokens, visited, non_tokens) = (set(), set(), set())
    stack = deque([matcher])
    while stack:
        matcher = stack.popleft()
        if matcher not in visited:
            if is_child(matcher, NonToken):
                non_tokens.add(matcher)
            if isinstance(matcher, BaseToken):
                tokens.add(matcher)
                if matcher.content:
                    assert_not_token(matcher.content, visited)
            else:
                for child in matcher:
                    if isinstance(child, Matcher):
                        stack.append(child)
            visited.add(matcher)
    if tokens and non_tokens:
        raise LexerError(
            fmt('The grammar contains a mix of Tokens and non-Token '
                   'matchers at the top level.  If Tokens are used then '
                   'non-token matchers that consume input must only '
                   'appear "inside" Tokens.  The non-Token matchers '
                   'include: {0}.',
                   '; '.join(str(n) for n in non_tokens)))
    return tokens
Exemple #2
0
def assert_not_token(node, visited):
    '''
    Assert that neither this nor any child node is a Token. 
    '''
    if isinstance(node, Matcher) and node not in visited:
        visited.add(node)
        if isinstance(node, BaseToken):
            raise LexerError(fmt('Nested token: {0}', node))
        else:
            for child in node:
                assert_not_token(child, visited)
Exemple #3
0
 def _match(self, stream):
     '''
     On matching we first assert that the token type is correct and then
     delegate to the content.
     '''
     if not self.compiled:
         raise LexerError(
             fmt('A {0} token has not been compiled. '
                    'You must use the lexer rewriter with Tokens. '
                    'This can be done by using matcher.config.lexer().',
                    self.__class__.__name__))
     ((tokens, _), next_stream) = s_next(stream)
     if self.id_ in tokens:
         yield ([], next_stream)
Exemple #4
0
 def  __to_regexp(regexp, alphabet):
     '''
     The regexp may be a matcher; if so we try to convert it to a regular
     expression and extract the equivalent text.
     '''
     if isinstance(regexp, Matcher):
         rewriter = CompileRegexp(alphabet)
         rewrite = rewriter(regexp)
         if isinstance(rewrite, BaseRegexp):
             regexp = str(rewrite.regexp)
         else:
             raise LexerError(
                 format('A Token was specified with a matcher, '
                        'but the matcher could not be converted to '
                        'a regular expression: {0}', rewrite))
     return regexp
Exemple #5
0
 def _match(self, stream):
     '''
     On matching we first assert that the token type is correct and then
     delegate to the content.
     '''
     if not self.compiled:
         raise LexerError(
             fmt('A {0} token has not been compiled. '
                    'You must use the lexer rewriter with Tokens. '
                    'This can be done by using matcher.config.lexer().',
                    self.__class__.__name__))
     ((tokens, line_stream), next_stream) = s_next(stream)
     if self.id_ in tokens:
         if self.content is None:
             # result contains all data (use s_next not s_line to set max)
             (line, _) = s_line(line_stream, True)
             (line, _) = s_next(line_stream, count=len(line))
             yield ([line], next_stream)
         else:
             generator = self.content._match(line_stream)
             while True:
                 (result, next_line_stream) = yield generator
                 if s_empty(next_line_stream) or not self.complete:
                     yield (result, next_stream)
Exemple #6
0
 def _match(self, stream):
     '''
     On matching we first assert that the token type is correct and then
     delegate to the content.
     '''
     if not self.compiled:
         raise LexerError(
             format('A {0} token has not been compiled. '
                    'You must use the lexer rewriter with Tokens. '
                    'This can be done by using matcher.config.lexer().',
                    self.__class__.__name__))
     if stream:
         (tokens, contents) = stream[0]
         if self.id_ in tokens:
             if self.content is None:
                 # result contains all data
                 yield ([contents], stream[1:])
             else:
                 new_stream = self.__new_stream(contents, stream)
                 generator = self.content._match(new_stream)
                 while True:
                     (result, stream_out) = yield generator
                     if not stream_out or not self.complete:
                         yield (result, stream[1:])