def parse(s, short_title): # make sure we start with a heading 1 m = h1_start.match(s) if m: title = m.groups("title")[0] # silently remove attempt to set short_title in H1 title = title.partition("§")[0] s = h1_start.sub("", s) else: raise InvalidWikiStructure('Must start with H1 heading to set title') title = title.strip() title = title[:min(150, len(title))] node = { 'title': title.strip(), 'short_title': short_title, 'children': [] } # do we need a StructureNode or will a TextNode do? if not general_heading.search(s): # TextNode node['text'] = s.strip() return node # else : StructureNode # determine used header depth: level = 0 for i in range(2, 7): m = get_heading_matcher(i) if m.search(s): level = i break assert 1 < level < 7 split_doc = m.split(s) # now the text before, between and after headings is split_doc[0::3] # the text of the headings are split_doc[1::3] # and the short titles (or None if omitted) are split_doc[2::3] # leading text is used to set text of structure node node['text'] = split_doc[0].strip() # assert that no headings are in that text if general_heading.search(node['text']): raise InvalidWikiStructure("Cannot have headers in Node text") # iterate the headings, short_titles, and corresponding texts: short_title_set = set() for title, short_title, text in zip(split_doc[1::3], split_doc[2::3], split_doc[3::3]): # check if short_title is valid/unique/exists if not short_title or len(short_title.strip()) == 0: short_title = title short_title = turn_into_valid_short_title(short_title, short_title_set) short_title_set.add(short_title) node['children'].append( parse("= %s =\n" % title.strip() + text.strip(), short_title)) return node
def get_title_from_text(text): # make sure we start with a heading 1 m = h1_start.match(text) if m: title = m.groups("title")[0] # silently remove attempt to set short_title in H1 title = title.partition("§")[0] return title.strip() else: raise InvalidWikiStructure('Must start with H1 heading to set title')
def split_title_from_text(text): m = h1_start.match(text) if m: title = m.groups("title")[0] # silently remove attempt to set short_title in H1 title = title.partition("§")[0].strip() text = re.sub(h1_start, "", text).strip() return title, text else: raise InvalidWikiStructure('Must start with H1 heading to set title')