def __init__(self, lexer): self._lexer = lexer self.stream = TokenStream(self._lexer.get_stream()) self.out_stream = [] self.ctx = {} self.last = ParsedData('initial', 'initial') self._open_tags = []
def filter(self, stream): """ Apply some filters on the stream so we can modify the stream after the tokenize process. """ for filter_func in self.filters: stream = TokenStream(filter_func(stream)) return stream
def tokenize(self): """ Go through the text and tokenize it... This method goes through the text, and calls for every change of ``self.pos`` the whole ``self.scan_re``. Then it tries to match the text from ``self.pos`` to ``self.max``. If matched try to call a *match_handler*, to get a token stream. If no *match_handler* defined, add a standardized `MarkupToken` to the stack. If no regular expression matched on the text it handles it as text and produce a `MarkupToken` with the name "text". """ while self.pos < self.max_length: for name, regex, handler in self.scan_re: m = self.match(regex) # if no match we try again with the next rule if not m: continue self.stack.flush_text() if handler: if hasattr(self, handler): # try to handle the match with the `handler` method stream = getattr(self, handler)(m) if stream: for token in stream: if not isinstance(token, MarkupToken): raise TokenError( '%r is no instance `MarkupToken`' % token) self.stack.push(token) else: raise HandlerNotFound( 'can not find %r in %r' % (handler, self.__class__.__name__)) else: # push the standardized token to the stack self.stack.push( MarkupToken(name, m.group(), m, **m.groupdict())) self.pos = self._end_pos break else: # no rex matched the text. send one char into the text buffer if self.pos < self.max_length: self.stack.write_text(self.text[self.pos]) else: self.stack.flush_text() self.pos += 1 self.stack.flush_text() self._parsed = True self.stream = self.filter(TokenStream(self.stack.flush())) return self.stream
class TokenStreamParser(object): # tags, must be closed before generate the output: # {'name': '</closetag>'} tags_tc = {} # ('handler_name', 'token_name_to_handle') handlers = [] def __init__(self, lexer): self._lexer = lexer self.stream = TokenStream(self._lexer.get_stream()) self.out_stream = [] self.ctx = {} self.last = ParsedData('initial', 'initial') self._open_tags = [] def parse(self): handler_names = [] if self.handlers: for entry in self.handlers: name = entry[0] if not hasattr(self, name): raise HandlerNotFound('Can\'t find the handler %r in %r' % ( name, self.__class__.__name__ )) handler_names.append(name) hitn = False else: hitn = True for token in self.stream: if not isinstance(token, MarkupToken): raise TokenError( '%r is no instance of \'MarkupToken\'' % token ) if token.name in handler_names or hitn: if hitn: handler = token.name else: handler = self.handlers[handler_names.index(token.name)] if hasattr(self, handler): getattr(self, handler)(token) else: raise HandlerNotFound('Can\'t find the handler %r in %r' % ( handler, self.__class__.__name__ )) else: self.push(ParsedData('text', escape_html(token.data))) def push(self, token): if token: if not isinstance(token, ParsedData): raise TokenError( '%r is no instance of \'ParsedData\'' % token ) self.out_stream.append(token) self.last = self.out_stream[-1] def pushmany(self, token_list): for token in token_list: self.push(token) def peek(self): return self.stream.look() def peekmany(self, num=1): tokens = self.stream.fetch_until(num) for tok in tokens: self.stream.feed(tok) return tokens # some functions we use for handling open and closing # tags def open_state(self, name): if not name in self.ctx: self.ctx[name] = True else: #FIXME: should we raise an error? pass def close_state(self, name): if name in self.ctx: self.ctx[name] = False def rev_state(self, name): self.ctx[name] = not self.ctx.get(name, False) def check_open_state(self, name): return self.ctx.get(name, False) def get_output(self, text): raise NotImplementedError
class TokenStreamParser(object): # tags, must be closed before generate the output: # {'name': '</closetag>'} tags_tc = {} # ('handler_name', 'token_name_to_handle') handlers = [] def __init__(self, lexer): self._lexer = lexer self.stream = TokenStream(self._lexer.get_stream()) self.out_stream = [] self.ctx = {} self.last = ParsedData('initial', 'initial') self._open_tags = [] def parse(self): handler_names = [] if self.handlers: for entry in self.handlers: name = entry[0] if not hasattr(self, name): raise HandlerNotFound('Can\'t find the handler %r in %r' % (name, self.__class__.__name__)) handler_names.append(name) hitn = False else: hitn = True for token in self.stream: if not isinstance(token, MarkupToken): raise TokenError('%r is no instance of \'MarkupToken\'' % token) if token.name in handler_names or hitn: if hitn: handler = token.name else: handler = self.handlers[handler_names.index(token.name)] if hasattr(self, handler): getattr(self, handler)(token) else: raise HandlerNotFound('Can\'t find the handler %r in %r' % (handler, self.__class__.__name__)) else: self.push(ParsedData('text', escape_html(token.data))) def push(self, token): if token: if not isinstance(token, ParsedData): raise TokenError('%r is no instance of \'ParsedData\'' % token) self.out_stream.append(token) self.last = self.out_stream[-1] def pushmany(self, token_list): for token in token_list: self.push(token) def peek(self): return self.stream.look() def peekmany(self, num=1): tokens = self.stream.fetch_until(num) for tok in tokens: self.stream.feed(tok) return tokens # some functions we use for handling open and closing # tags def open_state(self, name): if not name in self.ctx: self.ctx[name] = True else: #FIXME: should we raise an error? pass def close_state(self, name): if name in self.ctx: self.ctx[name] = False def rev_state(self, name): self.ctx[name] = not self.ctx.get(name, False) def check_open_state(self, name): return self.ctx.get(name, False) def get_output(self, text): raise NotImplementedError