def _init_block_parser(self): # Top level parser, to break up block level items p = RuleParser( Rule(VERBATIM_BLOCK, r''' ^(?P<pre_indent>\t*) \'\'\' \s*? # 3 "'" ( (?:^.*\n)*? ) # multi-line text ^(?P=pre_indent) \'\'\' \s*? \n # another 3 "'" with matching indent ''', process=self.parse_pre), Rule(OBJECT, r''' ^(?P<obj_indent>\t*) \{\{\{ \s*? (\S+:.*\n) # "{{{ object_type: attrib=..." ( (?:^.*\n)*? ) # multi-line body ^(?P=obj_indent) \}\}\} \s*? \n # "}}}" with matching indent ''', process=self.parse_object), Rule( HEADING, r'^( ==+ [\ \t]+ \S.*? ) [\ \t]* =* \n', # "==== heading ====" process=self.parse_heading), # standard table format Rule(TABLE, r''' ^(\|.*\|) \s*? \n # starting and ending with | ^( (?:\| [ \|\-:]+ \| \s*? \n)? ) # column align ( (?:^\|.*\| \s*? \n)+ ) # multi-lines: starting and ending with | ''', process=self.parse_table), # line format Rule(LINE, r'(?<=\n)-{5,}(?=\n)', process=self.parse_line) # \n----\n ) p.process_unmatched = self.parse_para return p
def _init_intermediate_parser(self): # Intermediate level, breaks up lists and indented blocks # TODO: deprecate this by taking lists out of the para # and make a new para for each indented block p = RuleParser( Rule('X-Bullet-List', r'''( ^ %s .* \n # Line starting with bullet (?: ^ \t* %s .* \n # Line with same or more indent and bullet )* # .. repeat )''' % (bullet_pattern, bullet_pattern), process=self.parse_list), Rule('X-Indented-Bullet-List', r'''( ^(?P<list_indent>\t+) %s .* \n # Line with indent and bullet (?: ^(?P=list_indent) \t* %s .* \n # Line with same or more indent and bullet )* # .. repeat )''' % (bullet_pattern, bullet_pattern), process=self.parse_list), Rule('X-Indented-Block', r'''( ^(?P<block_indent>\t+) .* \n # Line with indent (?: ^(?P=block_indent) (?!\t|%s) .* \n # Line with _same_ indent, no bullet )* # .. repeat )''' % bullet_pattern, process=self.parse_indent), ) p.process_unmatched = self.inline_parser return p
def build_text_parser(self): # Rules capture [% .. %] and <!--[% ... %]--> including "chomp" flags # First two rules block level instruction on it's own line # next two rules are embdedded in content line_tokens = '|'.join(map(re.escape, self._tokens_with_line_chomp)) text_parser = ( Rule('X-XML-Token', r''' ^[^\S\n]* # whitespace at line start \<\!--\[%% # start of instruction ( -? # rchomp \s+ (?:%s) # line tokens (?:\s[^%%]*?)? # optional expression -- the [^%%] os a bit of a hack here.. \s -? # lchomp ) %%\]--\> # end of instruction [^\S\n]*\n # whitespace and end of line ''' % line_tokens, process=self._process_token ) | Rule('X-Text-Token', r''' ^[^\S\n]* # whitespace at line start \[%% # start of instruction ( -? # rchomp \s+ (?:%s) # line tokens (?:\s[^%%]*?)? # optional expression -- the [^%%] os a bit of a hack here.. \s -? # lchomp ) %%\] # end of instruction [^\S\n]*\n # whitespace and end of line ''' % line_tokens, process=self._process_token ) | Rule('X-Inline-XML-Token', r'\<\!--\[%(-?\s.*?\s-?)%\]--\>', process=self._process_token ) | Rule('X-Inline-Text-Token', r'\[%(-?\s.*?\s-?)%\]', process=self._process_token ) ) return text_parser
def parse(self, input, partial=False): if not isinstance(input, str): input = ''.join(input) if not partial: input = fix_line_end(input) parser = zim.parser.Parser( Rule(LINK, url_re.r, process=self.parse_url) # FIXME need .r attribute because url_re is a Re object ) builder = ParseTreeBuilder(partial=partial) builder.start(FORMATTEDTEXT) parser(builder, input) builder.end(FORMATTEDTEXT) return builder.get_parsetree()
def _init_inline_parse(self): # Rules for inline formatting, links and tags my_url_re = old_url_re if self.backward_url_parsing else url_re descent = lambda *a: self.inline_parser(*a) self.nested_inline_parser = ( Rule(TAG, r'(?<!\S)@\w+', process=self.parse_tag) | Rule(EMPHASIS, r'//(?!/)(.*?)(?<!:)//', descent=descent) # no ':' at the end (ex: 'http://') | Rule(STRONG, r'\*\*(?!\*)(.*?)\*\*', descent=descent) | Rule(MARK, r'__(?!_)(.*?)__', descent=descent) | Rule(SUBSCRIPT, r'_\{(?!~)(.+?)\}', descent=descent) | Rule(SUPERSCRIPT, r'\^\{(?!~)(.+?)\}', descent=descent) | Rule(STRIKE, r'~~(?!~)(.+?)~~', descent=descent) | Rule(VERBATIM, r"''(?!')(.+?)''")) return (Rule(LINK, my_url_re, process=self.parse_url) | Rule( LINK, r'\[\[(?!\[)(.*?\]*)\]\]', process=self.parse_link) | Rule(IMAGE, r'\{\{(?!\{)(.*?)\}\}', process=self.parse_image) | self.nested_inline_parser)