def get_tree(text): text = text.replace("\r\n", "\n") text = text.replace("\r", "\n") token_stream = get_lexer(text, DocumentLexer) # first stage tree contains the full parse tree, including empty nodes # such as <p> </p> and <p> <br> </p>. first_stage_tree = build_parse_tree(token_stream) # second stage tree clears up the empty text elements second_stage_tree = filter_tree(first_stage_tree) return second_stage_tree
def parse_comment(text): from catonmat.parser.lexer import CommentLexer # TODO: this method is 1:1 as pageparser.py:parsepage(), # merge them! text = text.replace("\r\n", "\n") text = text.replace("\r", "\n") token_stream = get_lexer(text, CommentLexer) # first stage tree contains the full parse tree, including empty nodes # such as <p> </p> and <p> <br> </p>. first_stage_tree = build_parse_tree(token_stream) # second stage tree clears up the empty text elements second_stage_tree = filter_tree(first_stage_tree) buffer = StringIO() build_html(second_stage_tree, buffer) return buffer.getvalue()