def _parse_text(self, builder, text): '''Parse a piece of rich text, handles all inline formatting''' list = [text] list = parser_re['code'].sublist( lambda match: ('code', {}, match[1]), list) def parse_link(match): parts = match[1].strip('|').split('|', 2) # strip "|" to check against bugs like [[|foo]] and [[foo|]] link = parts[0] if len(parts) > 1: mytext = parts[1] else: mytext = link return ('link', {'href':link}, mytext) list = parser_re['link'].sublist(parse_link, list) def parse_image(match): parts = match[1].split('|', 2) src = parts[0] if len(parts) > 1: mytext = parts[1] else: mytext = None attrib = self.parse_image_url(src) return ('img', attrib, mytext) list = parser_re['img'].sublist(parse_image, list) # Put URLs here because urls can appear in links or image tags, but other markup # can appear in links, like '//' or '__' list = url_re.sublist( lambda match: ('link', {'href':match[1]}, match[1]) , list) for style in 'strong', 'mark', 'strike','sub', 'sup': list = parser_re[style].sublist( lambda match: (style, {}, match[1]) , list) for style in 'emphasis',: list = parser_re[style].sublist( lambda match: (style, {}, match[1]) , list) def parse_tag(re_): groups = re_.m.groupdict() return ('tag', groups, "@%s" % groups["name"]) list = parser_re['tag'].sublist(parse_tag, list) for item in list: if isinstance(item, tuple): tag, attrib, text = item builder.start(tag, attrib) if text: builder.data(text) builder.end(tag) else: builder.data(item)
def _parse_text(self, builder, text): '''Parse a piece of rich text, handles all inline formatting''' list = [text] list = parser_re['code'].sublist( lambda match: ('code', {}, match[1]), list) def parse_link(match): parts = match[1].split('|', 2) link = parts[0] if len(parts) > 1: mytext = parts[1] else: mytext = link if len(link) == 0: # [[|link]] bug link = mytext return ('link', {'href':link}, mytext) list = parser_re['link'].sublist(parse_link, list) def parse_image(match): parts = match[1].split('|', 2) src = parts[0] if len(parts) > 1: mytext = parts[1] else: mytext = None attrib = self.parse_image_url(src) return ('img', attrib, mytext) list = parser_re['img'].sublist(parse_image, list) for style in 'strong', 'mark', 'strike': list = parser_re[style].sublist( lambda match: (style, {}, match[1]) , list) list = url_re.sublist( lambda match: ('link', {'href':match[1]}, match[1]) , list) for style in 'emphasis',: list = parser_re[style].sublist( lambda match: (style, {}, match[1]) , list) for item in list: if isinstance(item, tuple): tag, attrib, text = item builder.start(tag, attrib) builder.data(text) builder.end(tag) else: builder.data(item)
def parse(self, input): if isinstance(input, basestring): input = input.splitlines(True) input = url_re.sublist( lambda match: ('link', {'href':match[1]}, match[1]) , input) builder = TreeBuilder() builder.start('zim-tree') for item in input: if isinstance(item, tuple): tag, attrib, text = item builder.start(tag, attrib) builder.data(text) builder.end(tag) else: builder.data(item) builder.end('zim-tree') return ParseTree(builder.close())