Beispiel #1
0
 def __init__(self, lexer):
     self._lexer = lexer
     self.stream = TokenStream(self._lexer.get_stream())
     self.out_stream = []
     self.ctx = {}
     self.last = ParsedData('initial', 'initial')
     self._open_tags = []
Beispiel #2
0
 def __init__(self, lexer):
     self._lexer = lexer
     self.stream = TokenStream(self._lexer.get_stream())
     self.out_stream = []
     self.ctx = {}
     self.last = ParsedData('initial', 'initial')
     self._open_tags = []
Beispiel #3
0
 def filter(self, stream):
     """
     Apply some filters on the stream
     so we can modify the stream after the
     tokenize process.
     """
     for filter_func in self.filters:
         stream = TokenStream(filter_func(stream))
     return stream
Beispiel #4
0
    def tokenize(self):
        """
        Go through the text and tokenize it...
        This method goes through the text, and calls for
        every change of ``self.pos`` the whole ``self.scan_re``.
        Then it tries to match the text from ``self.pos`` to ``self.max``.
        If matched try to call a *match_handler*, to get a token stream.
        If no *match_handler* defined, add a standardized `MarkupToken` to the stack.
        If no regular expression matched on the text it handles it as
        text and produce a `MarkupToken` with the name "text".
        """
        while self.pos < self.max_length:
            for name, regex, handler in self.scan_re:
                m = self.match(regex)
                # if no match we try again with the next rule
                if not m:
                    continue

                self.stack.flush_text()
                if handler:
                    if hasattr(self, handler):
                        # try to handle the match with the `handler` method
                        stream = getattr(self, handler)(m)
                        if stream:
                            for token in stream:
                                if not isinstance(token, MarkupToken):
                                    raise TokenError(
                                        '%r is no instance `MarkupToken`' %
                                        token)
                                self.stack.push(token)
                    else:
                        raise HandlerNotFound(
                            'can not find %r in %r' %
                            (handler, self.__class__.__name__))
                else:
                    # push the standardized token to the stack
                    self.stack.push(
                        MarkupToken(name, m.group(), m, **m.groupdict()))
                self.pos = self._end_pos
                break
            else:
                # no rex matched the text. send one char into the text buffer
                if self.pos < self.max_length:
                    self.stack.write_text(self.text[self.pos])
                else:
                    self.stack.flush_text()
                self.pos += 1
        self.stack.flush_text()
        self._parsed = True
        self.stream = self.filter(TokenStream(self.stack.flush()))
        return self.stream
Beispiel #5
0
class TokenStreamParser(object):

    # tags, must be closed before generate the output:
    # {'name': '</closetag>'}
    tags_tc = {}

    # ('handler_name', 'token_name_to_handle')
    handlers = []

    def __init__(self, lexer):
        self._lexer = lexer
        self.stream = TokenStream(self._lexer.get_stream())
        self.out_stream = []
        self.ctx = {}
        self.last = ParsedData('initial', 'initial')
        self._open_tags = []

    def parse(self):
        handler_names = []
        if self.handlers:
            for entry in self.handlers:
                name = entry[0]
                if not hasattr(self, name):
                    raise HandlerNotFound('Can\'t find the handler %r in %r' % (
                        name, self.__class__.__name__
                    ))
                handler_names.append(name)
            hitn = False
        else:
            hitn = True

        for token in self.stream:
            if not isinstance(token, MarkupToken):
                raise TokenError(
                    '%r is no instance of \'MarkupToken\'' % token
                )
            if token.name in handler_names or hitn:
                if hitn:
                    handler = token.name
                else:
                    handler = self.handlers[handler_names.index(token.name)]
                if hasattr(self, handler):
                    getattr(self, handler)(token)
                else:
                    raise HandlerNotFound('Can\'t find the handler %r in %r' % (
                        handler, self.__class__.__name__
                    ))
            else:
                self.push(ParsedData('text', escape_html(token.data)))

    def push(self, token):
        if token:
            if not isinstance(token, ParsedData):
                raise TokenError(
                    '%r is no instance of \'ParsedData\'' % token
                )
            self.out_stream.append(token)
            self.last = self.out_stream[-1]

    def pushmany(self, token_list):
        for token in token_list:
            self.push(token)

    def peek(self):
        return self.stream.look()

    def peekmany(self, num=1):
        tokens = self.stream.fetch_until(num)
        for tok in tokens:
            self.stream.feed(tok)
        return tokens

    # some functions we use for handling open and closing
    # tags
    def open_state(self, name):
        if not name in self.ctx:
            self.ctx[name] = True
        else:
            #FIXME: should we raise an error?
            pass

    def close_state(self, name):
        if name in self.ctx:
            self.ctx[name] = False

    def rev_state(self, name):
        self.ctx[name] = not self.ctx.get(name, False)

    def check_open_state(self, name):
        return self.ctx.get(name, False)

    def get_output(self, text):
        raise NotImplementedError
Beispiel #6
0
class TokenStreamParser(object):

    # tags, must be closed before generate the output:
    # {'name': '</closetag>'}
    tags_tc = {}

    # ('handler_name', 'token_name_to_handle')
    handlers = []

    def __init__(self, lexer):
        self._lexer = lexer
        self.stream = TokenStream(self._lexer.get_stream())
        self.out_stream = []
        self.ctx = {}
        self.last = ParsedData('initial', 'initial')
        self._open_tags = []

    def parse(self):
        handler_names = []
        if self.handlers:
            for entry in self.handlers:
                name = entry[0]
                if not hasattr(self, name):
                    raise HandlerNotFound('Can\'t find the handler %r in %r' %
                                          (name, self.__class__.__name__))
                handler_names.append(name)
            hitn = False
        else:
            hitn = True

        for token in self.stream:
            if not isinstance(token, MarkupToken):
                raise TokenError('%r is no instance of \'MarkupToken\'' %
                                 token)
            if token.name in handler_names or hitn:
                if hitn:
                    handler = token.name
                else:
                    handler = self.handlers[handler_names.index(token.name)]
                if hasattr(self, handler):
                    getattr(self, handler)(token)
                else:
                    raise HandlerNotFound('Can\'t find the handler %r in %r' %
                                          (handler, self.__class__.__name__))
            else:
                self.push(ParsedData('text', escape_html(token.data)))

    def push(self, token):
        if token:
            if not isinstance(token, ParsedData):
                raise TokenError('%r is no instance of \'ParsedData\'' % token)
            self.out_stream.append(token)
            self.last = self.out_stream[-1]

    def pushmany(self, token_list):
        for token in token_list:
            self.push(token)

    def peek(self):
        return self.stream.look()

    def peekmany(self, num=1):
        tokens = self.stream.fetch_until(num)
        for tok in tokens:
            self.stream.feed(tok)
        return tokens

    # some functions we use for handling open and closing
    # tags
    def open_state(self, name):
        if not name in self.ctx:
            self.ctx[name] = True
        else:
            #FIXME: should we raise an error?
            pass

    def close_state(self, name):
        if name in self.ctx:
            self.ctx[name] = False

    def rev_state(self, name):
        self.ctx[name] = not self.ctx.get(name, False)

    def check_open_state(self, name):
        return self.ctx.get(name, False)

    def get_output(self, text):
        raise NotImplementedError