Beispiel #1
0
 def dump(self, tree):
     assert isinstance(tree, ParseTree)
     assert self.linker, 'rst dumper needs a linker object'
     self.linker.set_usebase(True)
     output = TextBuffer()
     self.dump_children(tree.getroot(), output)
     return output.get_lines(end_with_newline=not tree.ispartial)
Beispiel #2
0
 def dump(self):
     text = TextBuffer(
         [u'<?xml version="1.0" encoding="utf-8"?>\n', u'<section>\n'])
     for node in self._nodetree:
         text += self._dump_node(node)
     text.append(u'</section>\n')
     return text.get_lines()
Beispiel #3
0
	def dump(self, tree):
		assert isinstance(tree, ParseTree)
		assert self.linker, 'HTML dumper needs a linker object'
		self.linker.set_usebase(True)
		output = TextBuffer()
		self._dump_children(tree.getroot(), output, istoplevel=True)
		return output.get_lines()
Beispiel #4
0
	def dump(self):
		text = TextBuffer([
			u'<?xml version="1.0" encoding="utf-8"?>\n',
			u'<section>\n' ])
		for node in self._nodetree:
			text += self._dump_node(node)
		text.append(u'</section>\n')
		return text.get_lines()
Beispiel #5
0
    def process(self, dict):
        '''Processes the template and returns a list of lines.
		The dict is used to get / set template parameters.
		'''
        if not isinstance(dict, TemplateDict):
            dict = TemplateDict(dict)
        output = TextBuffer(self.tokens.process(dict))
        return output.get_lines()
Beispiel #6
0
	def process(self, dict):
		'''Processes the template and returns a list of lines.
		The dict is used to get / set template parameters.
		'''
		if not isinstance(dict, TemplateDict):
			dict = TemplateDict(dict)
		output = TextBuffer(self.tokens.process(dict))
		return output.get_lines()
Beispiel #7
0
    def dump(self, tree):
        assert isinstance(tree, ParseTree)
        assert self.linker, 'LaTeX dumper needs a linker object'
        self.linker.set_usebase(False)
        self.end_tag_pending = []

        output = TextBuffer()
        self.dump_children(tree.getroot(), output)
        self.end_tag_pending.reverse()
        for et in self.end_tag_pending:
            output.append(sec_end_tag[et] + '\n')
        return output.get_lines()
Beispiel #8
0
	def dump(self, tree):
		assert isinstance(tree, ParseTree)
		assert self.linker, 'LaTeX dumper needs a linker object'
		self.linker.set_usebase(False)

		self.document_type = self.template_options.get('document_type')
			# Option set in template - potentially tainted value
		if not self.document_type in ('report', 'article','book'):
			logger.warn('No document type set in template, assuming "report"')
			self.document_type = 'report' # arbitrary default
		else:
			logger.info('used document type: %s'%self.document_type)

		output = TextBuffer()
		self.dump_children(tree.getroot(), output)
		return output.get_lines(end_with_newline=not tree.ispartial)
Beispiel #9
0
	def dump(self, tree):
		assert isinstance(tree, ParseTree)

		output = TextBuffer()
		for element in tree.getiterator():
			if not element.text is None:
				output.append(element.text)
			if not element.tail is None:
				output.append(element.tail)

		return output.get_lines()
Beispiel #10
0
    def dump_children(self, list, output, list_level=-1, list_type=None, list_iter='0'):
        if list.text:
            output.append(list.text)

        for element in list.getchildren():
            if element.tag in ('p', 'div'):
                #~ print element.tag
                #~ print element.text
                indent = 0
                if 'indent' in element.attrib:
                    indent = int(element.attrib['indent'])
                myoutput = TextBuffer()
                self.dump_children(element, myoutput) # recurs
                output.extend('\t'*indent)
                output.extend(myoutput)

            elif element.tag == 'h':
                level = int(element.attrib['level'])
                if level < 1:   level = 1
                elif level > 4: level = 4
                char = level_tag[level-1]
                heading = element.text
                line = char * len(heading)
                output.append(heading + '\n')
                output.append(line)

            elif element.tag in ('ul', 'ol'):
                indent = int(element.attrib.get('indent', 0))
                start = element.attrib.get('start')
                myoutput = TextBuffer()
                self.dump_children(element, myoutput, list_level=list_level+1, list_type=element.tag, list_iter=start) # recurs
                if list_level == -1:
                    output.extend(myoutput)
                else:
                    output.extend(myoutput)

            elif element.tag == 'li':
                if 'indent' in element.attrib:
                    # HACK for raw trees from pageview
                    list_level = int(element.attrib['indent'])

                if list_type == 'ol':
                    bullet = str(list_iter) + '.'
                    list_iter = increase_list_iter(list_iter) or '1' # fallback if iter not valid
                else:
                    bullet = bullet_types[element.attrib.get('bullet', BULLET)]
                output.append('\t'*list_level+bullet+' ')
                self.dump_children(element, output, list_level=list_level) # recurs
                output.append('\n')

            elif element.tag == 'pre':
                myoutput = TextBuffer()
                myoutput.append("::\n\n")
                text = [ '\t' + t for t in element.text.split('\n')]
                myoutput.append('\n'.join(text))
                output.extend(myoutput)

            elif element.tag == 'link':
                assert 'href' in element.attrib, \
                    'BUG: link %s "%s"' % (element.attrib, element.text)
                href = self.linker.link(element.attrib['href'])
                text = element.text
                output.append('`%s <%s>`_' % (text, href))

            elif element.tag in ('sub', 'sup'):
                if element.text:
                    tag = dumper_tags[element.tag]
                    output.append("%s`%s`\ " % (tag, element.text))

            elif element.tag in ('mark', 'strike'):
                if element.text: output.append(element.text)

            elif element.tag in ('strong', 'emphasis'):
                if element.text:
                    tag = dumper_tags[element.tag]
                    msg = tag + element.text + tag + ' '
                    if output: msg = ' ' + msg
                    output.append(msg)

            elif element.tag == 'img':
                src = self.linker.img(element.attrib['src'])
                output.append('.. image:: %s' % src)

            elif element.tag in dumper_tags:
                if element.text:
                    tag = dumper_tags[element.tag]
                    output.append(' ' + tag + element.text + tag + ' ')
            else:
                assert False, 'Unknown node type: %s' % element

            if element.tail:
                output.append(element.tail)
Beispiel #11
0
	def dump_children(self, list, output, list_level = -1):
		if list.text:
			output.append(tex_encode(list.text))

		for element in list.getchildren():
			text = tex_encode(element.text)
			if element.tag in ('p', 'div'):
				if 'indent' in element.attrib:
					indent = int(element.attrib['indent'])
				else:
					indent = 0
				myoutput = TextBuffer()
				self.dump_children(element,myoutput)
				if indent:
					myoutput.prefix_lines('\t'*indent)
				output.extend(myoutput)
			elif element.tag == 'h':
				level = int(element.attrib['level'])
				if level < 1: level = 1
				elif level > 5: level = 5
				output.append(sectioning[self.document_type][level]%(text))
			elif element.tag == 'ul':
				output.append('\\begin{itemize}\n')
				self.dump_children(element,output,list_level=list_level+1)
				output.append('\\end{itemize}')
			elif element.tag == 'ol':
				start = element.attrib.get('start', 1)
				if start in string.lowercase:
					type = 'a'
					start = string.lowercase.index(start) + 1
				elif start in string.uppercase:
					type = 'A'
					start = string.uppercase.index(start) + 1
				else:
					type = '1'
					start = int(start)
				output.append('\\begin{enumerate}[%s]\n' % type)
				if start > 1:
					output.append('\setcounter{enumi}{%i}\n' % (start-1))
				self.dump_children(element,output,list_level=list_level+1)
				output.append('\\end{enumerate}')
			elif element.tag == 'li':
				if 'bullet' in element.attrib:
					bullet = bullet_types[element.attrib['bullet']]
				else:
					bullet = bullet_types[BULLET]
				output.append('\t'*list_level+bullet)
				self.dump_children(element, output, list_level=list_level) # recurse
				output.append('\n')
			elif element.tag == 'pre':
				indent = 0
				if 'indent' in element.attrib:
					indent = int(element.attrib['indent'])
				myoutput = TextBuffer()
				myoutput.append(element.text)
				if indent:
					myoutput.prefix_lines('    ' * indent)
				output.append('\n\\begin{lstlisting}\n')
				output.extend(myoutput)
				output.append('\n\\end{lstlisting}\n')
			elif element.tag == 'sub':
				output.append('$_{%s}$' % element.text)
			elif element.tag == 'sup':
				output.append('$^{%s}$' % element.text)
			elif element.tag == 'img':
				#we try to get images about the same visual size, therefore need to specify dot density
				#96 dpi seems to be common for computer monitors
				dpi = 96
				done = False
				if 'type' in element.attrib and element.attrib['type'] == 'equation':
					try:
						# Try to find the source, otherwise fall back to image
						src = element.attrib['src'][:-4] + '.tex'
						file = self.linker.resolve_file(src)
						if file is not None:
							equation = file.read().strip()
						else:
							equation = None
					except FileNotFoundError:
						logger.warn('Could not find latex equation: %s', src)
					else:
						if equation:
							output.append('\\begin{math}\n')
							output.extend(equation)
							output.append('\n\\end{math}')
							done = True

				if not done:
					if 'width' in element.attrib and not 'height' in element.attrib:
						options = 'width=%fin, keepaspectratio=true' \
								% ( float(element.attrib['width']) / dpi )
					elif 'height' in element.attrib and not 'width' in element.attrib:
						options = 'height=%fin, keepaspectratio=true' \
								% ( float(element.attrib['height']) / dpi )
					else:
						options = ''

					#~ imagepath = File(self.linker.link(element.attrib['src'])).path
					imagepath = self.linker.link(element.attrib['src'])
					if imagepath.startswith('file://'):
						imagepath = File(imagepath).path # avoid URIs here
					image = '\\includegraphics[%s]{%s}' % (options, imagepath)
					if 'href' in element.attrib:
						href = self.linker.link(element.attrib['href'])
						output.append('\\href{%s}{%s}' % (href, image))
					else:
						output.append(image)
			elif element.tag == 'link':
				href = self.linker.link(element.attrib['href'])
				output.append('\\href{%s}{%s}' % (href, text))
			elif element.tag == 'emphasis':
				output.append('\\emph{'+text+'}')
			elif element.tag == 'strong':
				output.append('\\textbf{'+text+'}')
			elif element.tag == 'mark':
				output.append('\\uline{'+text+'}')
			elif element.tag == 'strike':
				output.append('\\sout{'+text+'}')
			elif element.tag == 'code':
				success = False
				#Here we try several possible delimiters for the inline verb command of LaTeX
				for delim in '+*|$&%!-_':
					if not delim in text:
						success = True
						output.append('\\lstinline'+delim+text+delim)
						break
				if not success:
					assert False, 'Found no suitable delimiter for verbatim text: %s' % element
					pass
			elif element.tag == 'tag':
				# LaTeX doesn't have anything similar to tags afaik
				output.append(text)
			else:
				assert False, 'Unknown node type: %s' % element

			if element.tail:
				output.append(tex_encode(element.tail))
Beispiel #12
0
	def dump_children(self, list, output, list_level=-1, list_type=None, list_iter='0'):
		if list.text:
			output.append(list.text)

		for element in list.getchildren():
			if element.tag in ('p', 'div'):
				indent = 0
				if 'indent' in element.attrib:
					indent = int(element.attrib['indent'])
				myoutput = TextBuffer()
				self.dump_children(element, myoutput) # recurs
				if indent:
					myoutput.prefix_lines('\t'*indent)
				output.extend(myoutput)
			elif element.tag == 'h':
				## Copy from Markdown
				level = int(element.attrib['level'])
				if level < 1:   level = 1
				elif level > 5: level = 5

				if level in (1, 2):
					# setext-style headers for lvl 1 & 2
					if level == 1: char = '='
					else: char = '-'
					heading = element.text
					line = char * len(heading)
					output.append(heading + '\n')
					output.append(line)
				else:
					# atx-style headers for deeper levels
					tag = '#' * level
					output.append(tag + ' ' + element.text)
			elif element.tag in ('ul', 'ol'):
				indent = int(element.attrib.get('indent', 0))
				start = element.attrib.get('start')
				myoutput = TextBuffer()
				self.dump_children(element, myoutput, list_level=list_level+1, list_type=element.tag, list_iter=start) # recurs
				if indent:
					myoutput.prefix_lines('\t'*indent)
				output.extend(myoutput)
			elif element.tag == 'li':
				if 'indent' in element.attrib:
					# HACK for raw trees from pageview
					list_level = int(element.attrib['indent'])

				if list_type == 'ol':
					bullet = str(list_iter) + '.'
					list_iter = increase_list_iter(list_iter) or '1' # fallback if iter not valid
				else:
					bullet = bullet_types[element.attrib.get('bullet', BULLET)]
				output.append('\t'*list_level+bullet+' ')
				self.dump_children(element, output, list_level=list_level) # recurs
				output.append('\n')
			elif element.tag == 'img':
				src = element.attrib['src']
				opts = []
				for k, v in element.attrib.items():
					if k == 'src' or k.startswith('_'):
						continue
					else:
						opts.append('%s=%s' % (k, v))
				if opts:
					src += '?%s' % '&'.join(opts)
				if element.text:
					output.append(element.text)
				else:
					output.append(src)
			elif element.tag == 'link':
				assert 'href' in element.attrib, \
					'BUG: link %s "%s"' % (element.attrib, element.text)
				href = element.attrib['href']
				if element.text:
					output.append(element.text)
				else:
					output.append(href)
			elif element.tag == 'pre':
				indent = 0
				if 'indent' in element.attrib:
					indent = int(element.attrib['indent'])
				myoutput = TextBuffer()
				myoutput.append(element.text)
				if indent:
					myoutput.prefix_lines('\t'*indent)
				output.extend(myoutput)
			elif element.text:
				output.append(element.text)
			else:
				pass

			if element.tail:
				output.append(element.tail)
Beispiel #13
0
    def dump_children(self, list, output, list_level=-1, list_type=None, list_iter='0'):
        if list.text:
            output.append(list.text)

        for element in list.getchildren():
            if element.tag in ('p', 'div'):
                indent = 0
                if 'indent' in element.attrib:
                    indent = int(element.attrib['indent'])
                myoutput = TextBuffer()
                self.dump_children(element, myoutput) # recurs
                # OPEN ISSUE: no indent for para
                #if indent:
                #    myoutput.prefix_lines('\t'*indent)
                output.extend(myoutput)
            elif element.tag == 'h':
                level = int(element.attrib['level'])
                if level < 1:   level = 1
                elif level > 5: level = 5

                if level in (1, 2):
                    # setext-style headers for lvl 1 & 2
                    if level == 1: char = '='
                    else: char = '-'
                    heading = element.text
                    line = char * len(heading)
                    output.append(heading + '\n')
                    output.append(line)
                else:
                    # atx-style headers for deeper levels
                    tag = '#' * level
                    output.append(tag + ' ' + element.text)
            elif element.tag in ('ul', 'ol'):
                indent = int(element.attrib.get('indent', 0))
                start = element.attrib.get('start')
                myoutput = TextBuffer()
                self.dump_children(element, myoutput, list_level=list_level+1, list_type=element.tag, list_iter=start) # recurs
                # OPEN ISSUE: no indent for para
                #if indent:
                #    myoutput.prefix_lines('\t'*indent)
                if list_level == -1:
                    # Need empty lines around lists in markdown
                    output.append('\n')
                    output.extend(myoutput)
                    output.append('\n')
                else:
                    output.extend(myoutput)
            elif element.tag == 'li':
                if 'indent' in element.attrib:
                    # HACK for raw trees from pageview
                    list_level = int(element.attrib['indent'])

                if list_type == 'ol':
                    bullet = str(list_iter) + '.'
                    list_iter = increase_list_iter(list_iter) or '1' # fallback if iter not valid
                else:
                    bullet = bullet_types[element.attrib.get('bullet', BULLET)]
                output.append('\t'*list_level+bullet+' ')
                self.dump_children(element, output, list_level=list_level) # recurs
                output.append('\n')
            elif element.tag == 'pre':
                indent = 0
                if 'indent' in element.attrib:
                    indent = int(element.attrib['indent'])
                myoutput = TextBuffer()
                myoutput.append(element.text)
                # OPEN ISSUE: no indent for para
                #if indent:
                #    myoutput.prefix_lines('\t'*indent)
                myoutput.prefix_lines('\t') # verbatim is always indented
                output.extend(myoutput)
            elif element.tag == 'link':
                assert 'href' in element.attrib, \
                    'BUG: link %s "%s"' % (element.attrib, element.text)
                href = self.linker.link(element.attrib['href'])
                text = element.text or href
                if href == text and url_re.match(href):
                    output.append('<' + href + '>')
                else:
                    output.append('[%s](%s)' % (text, href))
            elif element.tag == 'img':
                src = self.linker.img(element.attrib['src'])
                # OPEN ISSUE: image properties used in zim not supported in pandoc
                #opts = []
                #items = element.attrib.items()
                # we sort params only because unit tests don't like random output
                #items.sort()
                #for k, v in items:
                #    if k == 'src' or k.startswith('_'):
                #        continue
                #    elif v: # skip None, "" and 0
                #        opts.append('%s=%s' % (k, v))
                #if opts:
                #    src += '?%s' % '&'.join(opts)

                text = element.text or ''
                output.append('![%s](%s)' % (text, src))
            elif element.tag in dumper_tags:
                if element.text:
                    tag = dumper_tags[element.tag]
                    output.append(tag + element.text + tag)
            else:
                assert False, 'Unknown node type: %s' % element

            if element.tail:
                output.append(element.tail)
Beispiel #14
0
    def dump_children(self, list, output, list_level=-1):

        if list.text:
            output.append(tex_encode(list.text))

        for element in list.getchildren():
            text = tex_encode(element.text)

            if element.tag in ('p', 'div'):
                if 'indent' in element.attrib:
                    indent = int(element.attrib['indent'])
                else:
                    indent = 0
                myoutput = TextBuffer()
                self.dump_children(element, myoutput)
                if indent:
                    myoutput.prefix_lines('\t' * indent)
                output.extend(myoutput)

            elif element.tag == 'h':
                level = int(element.attrib['level'])
                if level < 1: level = 1
                elif level > 5: level = 5

                if self.end_tag_pending:
                    if self.end_tag_pending[-1] == level and level == 5:
                        output.append(sec_end_tag[level] + '\n')
                        self.end_tag_pending.pop()

                    if level <= 4:
                        self.end_tag_pending.reverse()
                        for et in self.end_tag_pending:
                            output.append(sec_end_tag[et] + '\n')
                        self.end_tag_pending = []

                if level >= 4:
                    if level == 4: output.append('%' + '_' * 78 + '\n')
                    self.end_tag_pending.append(level)

                output.append(sectioning[level] % (text))

            elif element.tag == 'ul':
                output.append('\\begin{itemize}\n')
                self.dump_children(element, output, list_level=list_level + 1)
                output.append('\\end{itemize}')

            elif element.tag == 'ol':
                start = element.attrib.get('start', 1)
                if start in string.lowercase:
                    type = 'a'
                    start = string.lowercase.index(start) + 1
                elif start in string.uppercase:
                    type = 'A'
                    start = string.uppercase.index(start) + 1
                else:
                    type = '1'
                    start = int(start)
                output.append('\\begin{enumerate}[%s]\n' % type)
                if start > 1:
                    output.append('\setcounter{enumi}{%i}\n' % (start - 1))
                self.dump_children(element, output, list_level=list_level + 1)
                output.append('\\end{enumerate}')

            elif element.tag == 'li':
                if 'bullet' in element.attrib:
                    bullet = bullet_types[element.attrib['bullet']]
                else:
                    bullet = bullet_types[BULLET]
                output.append('\t' * list_level + bullet)
                self.dump_children(element, output,
                                   list_level=list_level)  # recurse
                output.append('\n')

            elif element.tag == 'pre':
                indent = 0
                if 'indent' in element.attrib:
                    indent = int(element.attrib['indent'])
                myoutput = TextBuffer()
                myoutput.append(element.text)
                if indent:
                    myoutput.prefix_lines('    ' * indent)
                output.append('\n\\begin{lstlisting}\n')
                output.extend(myoutput)
                output.append('\n\\end{lstlisting}\n')

            elif element.tag == 'sub':
                output.append('$_{%s}$' % element.text)

            elif element.tag == 'sup':
                output.append('$^{%s}$' % element.text)

            elif element.tag == 'img':

                print element.attrib
                if list_level == -1: output.append('\\begin{center}\n')

                #we try to get images about the same visual size, therefore need to specify dot density
                #96 dpi seems to be common for computer monitors
                dpi = 96
                done = False
                if 'type' in element.attrib and element.attrib[
                        'type'] == 'equation':
                    try:
                        # Try to find the source, otherwise fall back to image
                        src = element.attrib['src'][:-4] + '.tex'
                        file = self.linker.resolve_file(src)
                        if file is not None:
                            equation = file.read().strip()
                        else:
                            equation = None
                    except FileNotFoundError:
                        logger.warn('Could not find latex equation: %s', src)
                    else:
                        if equation:
                            output.append('\\begin{math}\n')
                            output.extend(equation)
                            output.append('\n\\end{math}')
                            done = True

                if not done:
                    #  if 'width' in element.attrib and not 'height' in element.attrib:
                    #  options = 'width=%fin, keepaspectratio=true' \
                    #  % ( float(element.attrib['width']) / dpi )
                    #  elif 'height' in element.attrib and not 'width' in element.attrib:
                    #  options = 'height=%fin, keepaspectratio=true' \
                    #  % ( float(element.attrib['height']) / dpi )
                    #  else:
                    #  options = ''

                    imagepath = File(self.linker.link(
                        element.attrib['src'])).path
                    # imagepath = self.linker.link(element.attrib['src'])

                    # choose refering to width or height by w/h ratio.
                    # if height bigger than width, set to 0.8 by textheight.
                    # Otherwise if width bigger than height, set to 0.9 by
                    # textwidth
                    img_width, img_height = Image.open(imagepath).size
                    ratio = img_width / img_height

                    if ratio > 1:
                        options = 'width=0.90\\textwidth'
                    else:
                        options = 'height=0.80\\textheight'

                    image = '\\includegraphics[%s]{%s}' % (options, imagepath)
                    if 'href' in element.attrib:
                        href = self.linker.link(element.attrib['href'])
                        output.append('\\href{%s}{%s}' % (href, image))
                    else:
                        output.append(image)

                if list_level == -1: output.append('\n\\end{center}\n')

            elif element.tag == 'link':
                href = self.linker.link(element.attrib['href'])
                output.append('\\href{%s}{%s}' % (href, text))

            elif element.tag == 'emphasis':
                output.append('\\emph{' + text + '}')

            elif element.tag == 'strong':
                output.append('\\textbf{' + text + '}')

            elif element.tag == 'mark':
                output.append('\\uline{' + text + '}')

            elif element.tag == 'strike':
                output.append('\\sout{' + text + '}')

            elif element.tag == 'code':
                success = False
                #Here we try several possible delimiters for the inline verb command of LaTeX
                for delim in '+*|$&%!-_':
                    if not delim in text:
                        success = True
                        output.append('\\lstinline' + delim + text + delim)
                        break

                if not success:
                    assert False, 'Found no suitable delimiter for verbatim text: %s' % element
                    pass

            elif element.tag == 'tag':
                # LaTeX doesn't have anything similar to tags afaik
                output.append(text)

            else:
                assert False, 'Unknown node type: %s' % element

            if element.tail:
                output.append(tex_encode(element.tail))
Beispiel #15
0
	def dump_children(self, list, output, list_level=-1):
		if list.text:
			output.append(list.text)

		for element in list.getchildren():
			if element.tag == 'p':
				indent = 0
				if 'indent' in element.attrib:
					indent = int(element.attrib['indent'])
				myoutput = TextBuffer()
				self.dump_children(element, myoutput) # recurs
				if indent:
					myoutput.prefix_lines('\t'*indent)
				output.extend(myoutput)
			elif element.tag == 'ul':
				self.dump_children(element, output, list_level=list_level+1) # recurs
			elif element.tag == 'h':
				level = int(element.attrib['level'])
				if level < 1:   level = 1
				elif level > 5: level = 5
				tag = '='*(7 - level)
				output.append(tag+' '+element.text+' '+tag)
			elif element.tag == 'li':
				if 'indent' in element.attrib:
					list_level = int(element.attrib['indent'])
				if 'bullet' in element.attrib:
					bullet = bullet_types[element.attrib['bullet']]
				else:
					bullet = '*'
				output.append('\t'*list_level+bullet+' ')
				self.dump_children(element, output, list_level=list_level) # recurs
				output.append('\n')
			elif element.tag == 'pre':
				output.append("'''\n"+element.text+"'''\n")
			elif element.tag == 'img':
				src = element.attrib['src']
				opts = []
				for k, v in element.attrib.items():
					if k == 'src' or k.startswith('_'):
						continue
					else:
						opts.append('%s=%s' % (k, v))
				if opts:
					src += '?%s' % '&'.join(opts)
				if element.text:
					output.append('{{'+src+'|'+element.text+'}}')
				else:
					output.append('{{'+src+'}}')
			elif element.tag == 'link':
				assert 'href' in element.attrib, \
					'BUG: link %s "%s"' % (element.attrib, element.text)
				href = element.attrib['href']
				if href == element.text:
					if url_re.match(href):
						output.append(href)
					else:
						output.append('[['+href+']]')
				else:
					output.append('[['+href+'|'+element.text+']]')
			elif element.tag in dumper_tags:
				tag = dumper_tags[element.tag]
				output.append(tag+element.text+tag)
			else:
				assert False, 'Unknown node type: %s' % element

			if element.tail:
				output.append(element.tail)
Beispiel #16
0
    def dump_children(self, list, output, list_level=-1, list_type=None, list_iter='0'):
        if list.text:
            output.append(list.text)

        for element in list.getchildren():
            if element.tag in ('p', 'div'):
                indent = 0
                if 'indent' in element.attrib:
                    indent = int(element.attrib['indent'])
                myoutput = TextBuffer()
                self.dump_children(element, myoutput) # recurs
                if indent:
                    myoutput.prefix_lines('\t'*indent)
                output.extend(myoutput)
            elif element.tag == 'h':
                level = int(element.attrib['level'])
                if level < 1:   level = 1
                elif level > 5: level = 5
                tag = '='*(7 - level)
                output.append(tag+' '+element.text+' '+tag)
            elif element.tag in ('ul', 'ol'):
                indent = int(element.attrib.get('indent', 0))
                start = element.attrib.get('start')
                myoutput = TextBuffer()
                self.dump_children(element, myoutput, list_level=list_level+1, list_type=element.tag, list_iter=start) # recurs
                if indent:
                    myoutput.prefix_lines('\t'*indent)
                output.extend(myoutput)
            elif element.tag == 'li':
                if 'indent' in element.attrib:
                    # HACK for raw trees from pageview
                    list_level = int(element.attrib['indent'])
                if list_type == 'ol':
                    bullet = str(list_iter) + '.'
                    list_iter = increase_list_iter(list_iter) or '1' # fallback if iter not valid
                elif 'bullet' in element.attrib: # ul OR raw tree from pageview...
                    if element.attrib['bullet'] in bullet_types:
                        bullet = bullet_types[element.attrib['bullet']]
                    else:
                        bullet = element.attrib['bullet'] # Assume it is numbered..
                else: # ul
                    bullet = '*'
                output.append('\t'*list_level+bullet+' ')
                self.dump_children(element, output, list_level=list_level) # recurs
                output.append('\n')
            elif element.tag == 'pre':
                indent = 0
                if 'indent' in element.attrib:
                    indent = int(element.attrib['indent'])
                myoutput = TextBuffer()
                myoutput.append("'''\n"+element.text+"'''\n")
                if indent:
                    myoutput.prefix_lines('\t'*indent)
                output.extend(myoutput)
            elif element.tag == 'img':
                src = element.attrib['src']
                opts = []
                items = element.attrib.items()
                # we sort params only because unit tests don't like random output
                items.sort()
                for k, v in items:
                    if k == 'src' or k.startswith('_'):
                        continue
                    elif v: # skip None, "" and 0
                        opts.append('%s=%s' % (k, v))
                if opts:
                    src += '?%s' % '&'.join(opts)

                if element.text:
                    output.append('{{'+src+'|'+element.text+'}}')
                else:
                    output.append('{{'+src+'}}')

            elif element.tag == 'sub':
                output.append("_{%s}" % element.text)
            elif element.tag == 'sup':
                output.append("^{%s}" % element.text)
            elif element.tag == 'link':
                assert 'href' in element.attrib, \
                    'BUG: link %s "%s"' % (element.attrib, element.text)
                href = element.attrib['href']
                if href == element.text:
                    if url_re.match(href):
                        output.append(href)
                    else:
                        output.append('[['+href+']]')
                else:
                    if element.text:
                        output.append('[['+href+'|'+element.text+']]')
                    else:
                        output.append('[['+href+']]')

            elif element.tag in dumper_tags:
                if element.text:
                    tag = dumper_tags[element.tag]
                    output.append(tag+element.text+tag)
            else:
                assert False, 'Unknown node type: %s' % element

            if element.tail:
                output.append(element.tail)
Beispiel #17
0
 def dump(self, tree):
     #~ print 'DUMP WIKI', tree.tostring()
     assert isinstance(tree, ParseTree)
     output = TextBuffer()
     self.dump_children(tree.getroot(), output)
     return output.get_lines(end_with_newline=not tree.ispartial)