def read_tex(src): r"""Read next expression from buffer :param Buffer src: a buffer of tokens """ c = next(src) if c.startswith('$'): name = '$$' if c.startswith('$$') else '$' return TexEnv(name, [c[len(name):-len(name)]], nobegin=True) if c == '\\': if src.peek().startswith('item '): mode, expr = 'command', TexCmd(src.peek()[:4], (), TokenWithPosition.join(next(src).split(' ')[1:], glue=' ').strip()) elif src.peek() == 'begin': mode, expr = next(src), TexEnv(Arg.parse(src.forward(3)).value) else: mode, candidate, expr = 'command', next(src), None for i, c in enumerate(candidate): if c.isspace(): expr = TexCmd(candidate[:i], (), candidate[i+1:]) break if not expr: expr = TexCmd(candidate) while src.peek() in ARG_START_TOKENS: expr.args.append(read_tex(src)) if mode == 'begin': read_env(src, expr) if src.startswith('$'): expr.add_contents(read_tex(src)) return expr if c.startswith('\\'): return TexCmd(c[1:]) if c in ARG_START_TOKENS: return read_arg(src, c) return c
def read_tex(src): r"""Read next expression from buffer :param Buffer src: a buffer of tokens """ c = next(src) if c.startswith('$'): name = '$$' if c.startswith('$$') else '$' expr = TexEnv(name, [], nobegin=True) return read_math_env(src, expr) if c.startswith('\\'): command = TokenWithPosition(c[1:], src.position) if command == 'item': extra = src.forward_until(lambda string: any( [string.startswith(s) for s in {'\n', '\end', '\item'}])) mode, expr = 'command', TexCmd( command, (), TokenWithPosition.join(extra.split(' '), glue=' ').strip()) elif command == 'begin': mode, expr, _ = 'begin', TexEnv(src.peek(1)), src.forward(3) else: mode, expr = 'command', TexCmd(command) # TODO: allow only one line break # TODO: should really be handled by tokenizer candidate_index = src.num_forward_until(lambda s: not s.isspace()) src.forward(candidate_index) while src.peek() in ARG_START_TOKENS: expr.args.append(read_tex(src)) if not expr.args: src.backward(candidate_index) if mode == 'begin': read_env(src, expr) return expr if c in ARG_START_TOKENS: return read_arg(src, c) return c
def read_item(src): r"""Read the item content. There can be any number of whitespace characters between \item and the first non-whitespace character. However, after that first non-whitespace character, the item can only tolerate one successive line break at a time. \item can also take an argument. :param Buffer src: a buffer of tokens :return: contents of the item and any item arguments """ stringify = lambda s: TokenWithPosition.join(s.split(' '), glue=' ') def criterion(s): """Catch the first non-whitespace character""" return not any([s.startswith(substr) for substr in string.whitespace]) # Item argument such as in description environment arg = [] if src.peek() in ARG_START_TOKENS: c = next(src) arg.append(read_arg(src, c)) last = stringify(src.forward_until(criterion)) if last.startswith(' '): last = last[1:] extra = [last] while src.hasNext() and not src.startswith('\n\n') and \ not src.startswith('\item') and \ not src.startswith('\end') and \ not (hasattr(last, 'endswith') and last.endswith('\n\n') and len(extra) > 1): last = read_tex(src) extra.append(last) return extra, arg
def stringify(s): return TokenWithPosition.join(s.split(' '), glue=' ')