Ejemplo n.º 1
0
    def _init_block_parser(self):
        # Top level parser, to break up block level items
        p = RuleParser(
            Rule(VERBATIM_BLOCK,
                 r'''
				^(?P<pre_indent>\t*) \'\'\' \s*?				# 3 "'"
				( (?:^.*\n)*? )									# multi-line text
				^(?P=pre_indent) \'\'\' \s*? \n					# another 3 "'" with matching indent
				''',
                 process=self.parse_pre),
            Rule(OBJECT,
                 r'''
				^(?P<obj_indent>\t*) \{\{\{ \s*? (\S+:.*\n)		# "{{{ object_type: attrib=..."
				( (?:^.*\n)*? ) 								# multi-line body
				^(?P=obj_indent) \}\}\} \s*? \n					# "}}}" with matching indent
				''',
                 process=self.parse_object),
            Rule(
                HEADING,
                r'^( ==+ [\ \t]+ \S.*? ) [\ \t]* =* \n',  # "==== heading ===="
                process=self.parse_heading),
            # standard table format
            Rule(TABLE,
                 r'''
				^(\|.*\|) \s*? \n								# starting and ending with |
				^( (?:\| [ \|\-:]+ \| \s*? \n)? )				# column align
				( (?:^\|.*\| \s*? \n)+ )							# multi-lines: starting and ending with |
				''',
                 process=self.parse_table),
            # line format
            Rule(LINE, r'(?<=\n)-{5,}(?=\n)',
                 process=self.parse_line)  # \n----\n
        )
        p.process_unmatched = self.parse_para
        return p
Ejemplo n.º 2
0
    def _init_intermediate_parser(self):
        # Intermediate level, breaks up lists and indented blocks
        # TODO: deprecate this by taking lists out of the para
        #       and make a new para for each indented block
        p = RuleParser(
            Rule('X-Bullet-List',
                 r'''(
					^ %s .* \n								# Line starting with bullet
					(?:
						^ \t* %s .* \n						# Line with same or more indent and bullet
					)*										# .. repeat
				)''' % (bullet_pattern, bullet_pattern),
                 process=self.parse_list),
            Rule('X-Indented-Bullet-List',
                 r'''(
					^(?P<list_indent>\t+) %s .* \n			# Line with indent and bullet
					(?:
						^(?P=list_indent) \t* %s .* \n		# Line with same or more indent and bullet
					)*										# .. repeat
				)''' % (bullet_pattern, bullet_pattern),
                 process=self.parse_list),
            Rule('X-Indented-Block',
                 r'''(
					^(?P<block_indent>\t+) .* \n			# Line with indent
					(?:
						^(?P=block_indent) (?!\t|%s) .* \n	# Line with _same_ indent, no bullet
					)*										# .. repeat
				)''' % bullet_pattern,
                 process=self.parse_indent),
        )
        p.process_unmatched = self.inline_parser
        return p
Ejemplo n.º 3
0
	def build_text_parser(self):
		# Rules capture [% .. %] and <!--[% ... %]--> including "chomp" flags
		# First two rules block level instruction on it's own line
		# next two rules are embdedded in content
		line_tokens = '|'.join(map(re.escape, self._tokens_with_line_chomp))
		text_parser = (
			Rule('X-XML-Token', r'''
				^[^\S\n]*			# whitespace at line start
				\<\!--\[%%			# start of instruction
				(
					-?				# rchomp
					\s+
					(?:%s)			# line tokens
					(?:\s[^%%]*?)?	# optional expression -- the [^%%] os a bit of a hack here..
					\s
					-?				# lchomp
				)
				%%\]--\>			# end of instruction
				[^\S\n]*\n			# whitespace and end of line
				''' % line_tokens,
				process=self._process_token )
			| Rule('X-Text-Token', r'''
				^[^\S\n]*			# whitespace at line start
				\[%%				# start of instruction
				(
					-?				# rchomp
					\s+
					(?:%s)			# line tokens
					(?:\s[^%%]*?)?	# optional expression -- the [^%%] os a bit of a hack here..
					\s
					-?				# lchomp
				)
				%%\]				# end of instruction
				[^\S\n]*\n			# whitespace and end of line
				''' % line_tokens,
				process=self._process_token )
			| Rule('X-Inline-XML-Token',
				r'\<\!--\[%(-?\s.*?\s-?)%\]--\>',
				process=self._process_token )
			| Rule('X-Inline-Text-Token',
				r'\[%(-?\s.*?\s-?)%\]',
				process=self._process_token )
		)
		return text_parser
Ejemplo n.º 4
0
	def parse(self, input, partial=False):
		if not isinstance(input, str):
			input = ''.join(input)

		if not partial:
			input = fix_line_end(input)

		parser = zim.parser.Parser(
			Rule(LINK, url_re.r, process=self.parse_url) # FIXME need .r attribute because url_re is a Re object
		)

		builder = ParseTreeBuilder(partial=partial)
		builder.start(FORMATTEDTEXT)
		parser(builder, input)
		builder.end(FORMATTEDTEXT)
		return builder.get_parsetree()
Ejemplo n.º 5
0
    def _init_inline_parse(self):
        # Rules for inline formatting, links and tags
        my_url_re = old_url_re if self.backward_url_parsing else url_re
        descent = lambda *a: self.inline_parser(*a)

        self.nested_inline_parser = (
            Rule(TAG, r'(?<!\S)@\w+', process=self.parse_tag)
            | Rule(EMPHASIS, r'//(?!/)(.*?)(?<!:)//',
                   descent=descent)  # no ':' at the end (ex: 'http://')
            | Rule(STRONG, r'\*\*(?!\*)(.*?)\*\*', descent=descent)
            | Rule(MARK, r'__(?!_)(.*?)__', descent=descent)
            | Rule(SUBSCRIPT, r'_\{(?!~)(.+?)\}', descent=descent)
            | Rule(SUPERSCRIPT, r'\^\{(?!~)(.+?)\}', descent=descent)
            | Rule(STRIKE, r'~~(?!~)(.+?)~~', descent=descent)
            | Rule(VERBATIM, r"''(?!')(.+?)''"))

        return (Rule(LINK, my_url_re, process=self.parse_url)
                | Rule(
                    LINK, r'\[\[(?!\[)(.*?\]*)\]\]', process=self.parse_link)
                | Rule(IMAGE, r'\{\{(?!\{)(.*?)\}\}', process=self.parse_image)
                | self.nested_inline_parser)