def tokenize_spacers(text, prev=None): r"""Combine spacers [ + line break [ + spacer]] >>> tokenize_spacers(categorize('\t\n{there')) '\t\n' >>> tokenize_spacers(categorize('\t\nthere')) >>> tokenize_spacers(categorize(' \t ')) ' \t ' >>> tokenize_spacers(categorize(r' ccc')) """ result = Token('', text.position) while text.hasNext() and text.peek().category == CC.Spacer: result += text.forward(1) if text.hasNext() and text.peek().category == CC.EndOfLine: result += text.forward(1) while text.hasNext() and text.peek().category == CC.Spacer: result += text.forward(1) result.category = TC.MergedSpacer if text.hasNext() and text.peek().category in (CC.Letter, CC.Other): text.backward(text.position - result.position) return if result: return result
def tokenize_line_comment(text, prev=None): r"""Process a line comment :param Buffer text: iterator over line, with current position >>> tokenize_line_comment(categorize('%hello world\\')) '%hello world\\' >>> tokenize_line_comment(categorize('hello %world')) >>> tokenize_line_comment(categorize('%}hello world')) '%}hello world' >>> tokenize_line_comment(categorize('%} ')) '%} ' >>> tokenize_line_comment(categorize('%hello\n world')) '%hello' >>> b = categorize(r'\\%') >>> _ = next(b), next(b) >>> tokenize_line_comment(b) '%' >>> tokenize_line_comment(categorize(r'\%')) """ result = Token('', text.position) if text.peek().category == CC.Comment and (prev is None or prev.category != CC.Comment): result += text.forward(1) while text.hasNext() and text.peek().category != CC.EndOfLine: result += text.forward(1) result.category = TC.Comment return result
def tokenize_math_sym_switch(text, prev=None): r"""Group characters in math switches. :param Buffer text: iterator over line, with current position >>> tokenize_math_sym_switch(categorize(r'$\min_x$ \command')) '$' >>> tokenize_math_sym_switch(categorize(r'$$\min_x$$ \command')) '$$' """ if text.peek().category == CC.MathSwitch: if text.peek(1) and text.peek(1).category == CC.MathSwitch: result = Token(text.forward(2), text.position) result.category = TC.DisplayMathSwitch else: result = Token(text.forward(1), text.position) result.category = TC.MathSwitch return result