def build_section_parser(): """ Build a parser for structure of markdown document. This works by recursion - we build a parser which captures all subsections of header depth at least n (markdown_depth(n)). A markdown parser is then just as many markdown_depth(0) as we can possibly collect. When we parse a header, we need to figure out whether we are going to add it inside the subsections of the currently accumulating section, or go up a level instead. To do this we use a lookahead parse (lib.peek) and a conditional subsequent parse. This is the motivation for parse_section_if_depth_sat. """ parser_concat = lib.lift(lambda *args: "".join(args)) header_tag = parser_concat( lib.char('#'), lib.take_until(lib.char(' '), lib.char('#')).map(lambda cs: "".join(cs))) def parse_section_if_depth_sat(pred): """ Parses a header if it's depth satisfies the passed predicate """ def conditional_parse(t): return section(len(t)) if pred(len(t)) else lib.fail() return lib.peek(header_tag).bind(conditional_parse) def markdown_depth(n): """ Parse markdown of section depth at least n """ return lib.alternative( parse_section_if_depth_sat(lambda m: m > n), line) def section(n): """ Capture the contents of a section of depth n. Note that this function assumes that n is the correct depth for the next header. A section of depth n is parsed by parsing it's title, then parsing all subsections of depth exceeding n+1 """ remove_whitespace = lib.lift(lambda s: s.strip()) title = remove_whitespace(lib.right(header_tag, line)) subsections = lib.take_until( parse_section_if_depth_sat(lambda m: m <= n), markdown_depth(n)) @lib.parser(title, subsections) def _section(t, ss): return Section(n, t, ss) return _section return lib.many(markdown_depth(0))
def builder(): return lib.many(lib.anychar())
def test_many_on_empty_string(): parser = lib.many(lib.char('a')) assert (parser("") == [])
def test_many_until_end_of_string(): parser = lib.many(lib.char('a')).map(lambda cs: "".join(cs)) assert (parser("aaa") == "aaa")
def test_many(): parser = lib.many(lib.char('a')).map(lambda cs: "".join(cs)).partial() assert (parser("aaabbb") == ("aaa", "bbb"))