class Dumper(DumperClass): TAGS = { EMPHASIS: ('<i>', '</i>'), STRONG: ('<b>', '</b>'), MARK: ('<u>', '</u>'), STRIKE: ('<s>', '</s>'), VERBATIM: ('<tt>', '</tt>'), TAG: ('<span class="zim-tag">', '</span>'), SUBSCRIPT: ('<sub>', '</sub>'), SUPERSCRIPT: ('<sup>', '</sup>'), } TEMPLATE_OPTIONS = { 'empty_lines': Choice('default', ('default', 'remove')), 'line_breaks': Choice('default', ('default', 'remove')), } def dump(self, tree): # FIXME should be an init function for this self._isrtl = None return DumperClass.dump(self, tree) def encode_text(self, tag, text): # if _isrtl is already set the direction was already # determined for this section if self._isrtl is None and not text.isspace(): self._isrtl = self.isrtl(text) text = html_encode(text) if tag not in (VERBATIM_BLOCK, VERBATIM, OBJECT) \ and not self.template_options['line_breaks'] == 'remove': text = text.replace('\n', '<br>\n') return text def text(self, text): if self.context[-1].tag == FORMATTEDTEXT \ and text.isspace(): # Reduce top level empty lines if self.template_options['empty_lines'] == 'remove': self.context[-1].text.append('\n') else: l = text.count('\n') - 1 if l > 0: self.context[-1].text.append('\n' + ('<br>\n' * l) + '\n') elif l == 0: self.context[-1].text.append('\n') else: DumperClass.text(self, text) def dump_h(self, tag, attrib, strings): h = 'h' + str(attrib['level']) if self._isrtl: start = '<' + h + ' dir=\'rtl\'>' else: start = '<' + h + '>' self._isrtl = None # reset end = '</' + h + '>\n' strings.insert(0, start) strings.append(end) return strings def dump_block(self, tag, attrib, strings, _extra=None): if strings and strings[-1].endswith('<br>\n'): strings[-1] = strings[-1][:-5] elif strings and strings[-1].endswith('\n'): strings[-1] = strings[-1][:-1] start = '<' + tag if self._isrtl: start += ' dir=\'rtl\'' self._isrtl = None # reset if 'indent' in attrib: level = int(attrib['indent']) start += ' style=\'padding-left: %ipt\'' % (30 * level) if _extra: start += ' ' + _extra start += '>\n' if tag in ('ul', 'ol'): end = '</' + tag + '>\n' if strings: # close last <li> element strings.append('</li>\n') if self.context[-1].tag in ('ul', 'ol'): # Nested list start = '\n' + start else: end = '\n</' + tag + '>\n' strings.insert(0, start) strings.append(end) return strings dump_p = dump_block dump_div = dump_block dump_pre = dump_block dump_ul = dump_block def dump_ol(self, tag, attrib, strings): myattrib = '' if 'start' in attrib: start = attrib['start'] if start in 'abcdefghijklmnopqrstuvwxyz': type = 'a' start = 'abcdefghijklmnopqrstuvwxyz'.index(start) + 1 elif start in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ': type = 'A' start = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'.index(start) + 1 else: type = '1' return self.dump_block(tag, attrib, strings, _extra='type="%s" start="%s"' % (type, start)) else: return self.dump_block(tag, attrib, strings) def dump_li(self, tag, attrib, strings): bullet = attrib.get('bullet', BULLET) if self.context[-1].tag == BULLETLIST and bullet != BULLET: start = '<li class="%s"' % bullet else: start = '<li' if self._isrtl: start += ' dir=\'rtl\'>' else: start += '>' self._isrtl = None # reset strings.insert(0, start) if self.context[-1].text: # we are not the first <li> element, close previous strings.insert(0, '</li>\n') return strings def dump_link(self, tag, attrib, strings=None): href = self.linker.link(attrib['href']) type = link_type(attrib['href']) if strings: text = ''.join(strings) else: text = attrib['href'] title = text.replace('"', '"') title = re.sub('<.*?>', '', title) return [ '<a href="%s" title="%s" class="%s">%s</a>' % (href, title, type, text) ] def dump_img(self, tag, attrib, strings=None): src = self.linker.img(attrib['src']) opt = '' if 'alt' in attrib: opt += ' alt="%s"' % html_encode(attrib['alt']).replace( '"', '"') for o in ('width', 'height'): if o in attrib and int(float(attrib[o])) > 0: opt += ' %s="%s"' % (o, attrib[o]) if 'href' in attrib: href = self.linker.link(attrib['href']) return ['<a href="%s"><img src="%s"%s></a>' % (href, src, opt)] else: return ['<img src="%s"%s>' % (src, opt)] def dump_object(self, tag, attrib, strings=[]): strings = DumperClass.dump_object(self, tag, attrib, strings) if not attrib['type'].startswith('image+'): strings.insert(0, '<div class="zim-object">\n') strings.append('</div>\n') return strings def dump_object_fallback(self, tag, attrib, strings=None): # Fallback to verbatim paragraph return ['<pre>\n'] + list(map(html_encode, strings)) + ['</pre>\n'] def dump_table(self, tag, attrib, strings): aligns = attrib['aligns'].split(',') tdcount = 0 def align(pos): if pos == 'left' or pos == 'right' or pos == 'center': return ' align="' + pos + '"' return '' for i, string in enumerate(strings): if '<tr' in string: tdcount = 0 elif '<th' in string: strings[i] = string.replace('<th', '<th' + align(aligns[tdcount])) tdcount += 1 elif '<td' in string: strings[i] = string.replace('<td', '<td' + align(aligns[tdcount])) tdcount += 1 strings.insert(0, '<table>\n') strings.append('</table>\n') return strings def dump_thead(self, tag, attrib, strings): strings.insert(0, '<thead><tr>\n') strings.append('</tr></thead>\n') return strings def dump_th(self, tag, attrib, strings): strings.insert(0, ' <th>') strings.append('</th>\n') return strings def dump_trow(self, tag, attrib, strings): strings.insert(0, '<tr>\n') strings.append('</tr>\n') return strings def dump_td(self, tag, attrib, strings): if strings == [" "]: strings = [" "] strings.insert(0, ' <td>') strings.append('</td>\n') return strings def dump_line(self, tag, attrib, strings=None): return '\n<hr>\n'
class Dumper(DumperClass): TAGS = { EMPHASIS: ('<i>', '</i>'), STRONG: ('<b>', '</b>'), MARK: ('<u>', '</u>'), STRIKE: ('<s>', '</s>'), VERBATIM: ('<tt>', '</tt>'), TAG: ('<span class="zim-tag">', '</span>'), SUBSCRIPT: ('<sub>', '</sub>'), SUPERSCRIPT: ('<sup>', '</sup>'), } TEMPLATE_OPTIONS = { 'empty_lines': Choice('default', ('default', 'remove')), 'line_breaks': Choice('default', ('default', 'remove')), } def dump(self, tree): # FIXME should be an init function for this self._isrtl = None return DumperClass.dump(self, tree) def encode_text(self, tag, text): # if _isrtl is already set the direction was already # determined for this section if self._isrtl is None and not text.isspace(): self._isrtl = self.isrtl(text) text = html_encode(text) if tag not in (VERBATIM_BLOCK, VERBATIM, OBJECT) \ and not self.template_options['line_breaks'] == 'remove': text = text.replace('\n', '<br>\n') return text def text(self, text): if self.context[-1].tag == FORMATTEDTEXT \ and text.isspace(): # Reduce top level empty lines if self.template_options['empty_lines'] == 'remove': self.context[-1].text.append('\n') else: l = text.count('\n') - 1 if l > 0: self.context[-1].text.append('\n' + ('<br>\n' * l) + '\n') elif l == 0: self.context[-1].text.append('\n') else: DumperClass.text(self, text) def dump_h(self, tag, attrib, strings): h = 'h' + str(attrib['level']) if self._isrtl: start = '<' + h + ' dir=\'rtl\'>' else: start = '<' + h + '>' self._isrtl = None # reset end = '</' + h + '>\n' strings.insert(0, start) strings.append(end) return strings def dump_block(self, tag, attrib, strings, _extra=None): if strings and strings[-1].endswith('<br>\n'): strings[-1] = strings[-1][:-5] elif strings and strings[-1].endswith('\n'): strings[-1] = strings[-1][:-1] start = '<' + tag if self._isrtl: start += ' dir=\'rtl\'' self._isrtl = None # reset if 'indent' in attrib: level = int(attrib['indent']) start += ' style=\'padding-left: %ipt\'' % (30 * level) if _extra: start += ' ' + _extra start += '>\n' if tag in ('ul', 'ol'): end = '</' + tag + '>\n' if strings: # close last <li> element strings.append('</li>\n') if self.context[-1].tag in ('ul', 'ol'): # Nested list start = '\n' + start else: end = '\n</' + tag + '>\n' strings.insert(0, start) strings.append(end) return strings dump_p = dump_block dump_div = dump_block dump_pre = dump_block dump_ul = dump_block def dump_ol(self, tag, attrib, strings): myattrib = '' if 'start' in attrib: start = attrib['start'] if start in string.lowercase: type = 'a' start = string.lowercase.index(start) + 1 elif start in string.uppercase: type = 'A' start = string.uppercase.index(start) + 1 else: type = '1' return self.dump_block(tag, attrib, strings, _extra='type="%s" start="%s"' % (type, start)) else: return self.dump_block(tag, attrib, strings) def dump_li(self, tag, attrib, strings): bullet = attrib.get('bullet', BULLET) if self.context[-1].tag == BULLETLIST and bullet != BULLET: start = '<li class="%s"' % bullet else: start = '<li' if self._isrtl: start += ' dir=\'rtl\'>' else: start += '>' self._isrtl = None # reset strings.insert(0, start) if self.context[-1].text: # we are not the first <li> element, close previous strings.insert(0, '</li>\n') return strings def dump_link(self, tag, attrib, strings=None): href = self.linker.link(attrib['href']) type = link_type(attrib['href']) if strings: text = u''.join(strings) else: text = attrib['href'] title = text.replace('"', '"') return [ '<a href="%s" title="%s" class="%s">%s</a>' % (href, title, type, text) ] def dump_img(self, tag, attrib, strings=None): src = self.linker.img(attrib['src']) opt = '' if 'alt' in attrib: opt += ' alt="%s"' % html_encode(attrib['alt']).replace( '"', '"') for o in ('width', 'height'): if o in attrib and int(float(attrib[o])) > 0: opt += ' %s="%s"' % (o, attrib[o]) if 'href' in attrib: href = self.linker.link(attrib['href']) return ['<a href="%s"><img src="%s"%s></a>' % (href, src, opt)] else: return ['<img src="%s"%s>' % (src, opt)] def dump_object(self, *arg, **kwarg): strings = DumperClass.dump_object(self, *arg, **kwarg) strings.insert(0, '<div class="zim-object">\n') strings.append('</div>\n') return strings def dump_object_fallback(self, tag, attrib, strings=None): # Fallback to verbatim paragraph strings.insert(0, '<pre>\n') strings.append('</pre>\n') return strings
class Dumper(TextDumper): BULLETS = { UNCHECKED_BOX: '\\item[\\Square]', XCHECKED_BOX: '\\item[\\XBox]', CHECKED_BOX: '\\item[\\CheckedBox]', MIGRATED_BOX: '\\item[\\RIGHTarrow]', BULLET: '\\item', } SECTIONING = { 'report': { 1: '\\chapter{%s}', 2: '\\section{%s}', 3: '\\subsection{%s}', 4: '\\subsubsection{%s}', 5: '\\paragraph{%s}' }, 'article': { 1: '\\section{%s}', 2: '\\subsection{%s}', 3: '\\subsubsection{%s}', 4: '\\paragraph{%s}', 5: '\\subparagraph{%s}' }, 'book': { 1: '\\part{%s}', 2: '\\chapter{%s}', 3: '\\section{%s}', 4: '\\subsection{%s}', 5: '\\subsubsection{%s}' } } TAGS = { EMPHASIS: ('\\emph{', '}'), STRONG: ('\\textbf{', '}'), MARK: ('\\uline{', '}'), STRIKE: ('\\sout{', '}'), TAG: ('', ''), # No additional annotation (apart from the visible @) SUBSCRIPT: ('$_{', '}$'), SUPERSCRIPT: ('$^{', '}$'), LINE_RETURN: ('\\linebreak', ''), # TODO } TEMPLATE_OPTIONS = { 'document_type': Choice('report', ('report', 'article', 'book')) } def dump(self, tree): assert isinstance(tree, ParseTree) assert self.linker, 'LaTeX dumper needs a linker object' self.document_type = self.template_options['document_type'] logger.info('used document type: %s' % self.document_type) return TextDumper.dump(self, tree) @staticmethod def encode_text(tag, text): if tag not in (VERBATIM_BLOCK, VERBATIM, OBJECT): return encode_re.sub(lambda m: encode_dict[m.group(1)], text) else: return text def dump_pre(self, tag, attrib, strings): indent = int(attrib.get('indent', 0)) text = ''.join(strings) text = text.replace('\n\n', '\n') # remove newlines introduces by encode_text strings = text.splitlines(True) if indent: strings = self.prefix_lines(' ' * indent, strings) strings.insert(0, '\n\\begin{lstlisting}\n') strings.append('\n\\end{lstlisting}\n') return strings def dump_h(self, tag, attrib, strings): level = int(attrib['level']) if level < 1: level = 1 elif level > 5: level = 5 text = ''.join(strings) return [self.SECTIONING[self.document_type][level] % text] def dump_ul(self, tag, attrib, strings): strings.insert(0, '\\begin{itemize}\n') strings.append('\\end{itemize}\n') return TextDumper.dump_ul(self, tag, attrib, strings) def dump_ol(self, tag, attrib, strings): start = attrib.get('start', 1) if start in string.ascii_lowercase: type = 'a' start = string.ascii_lowercase.index(start) + 1 elif start in string.ascii_uppercase: type = 'A' start = string.ascii_uppercase.index(start) + 1 else: type = '1' start = int(start) strings.insert(0, '\\begin{enumerate}[%s]\n' % type) if start > 1: strings.insert(1, '\setcounter{enumi}{%i}\n' % (start - 1)) strings.append('\\end{enumerate}\n') return TextDumper.dump_ol(self, tag, attrib, strings) def dump_li(self, tag, attrib, strings): # Always return "\item" for numbered lists if self.context[-1].tag == BULLETLIST: if 'bullet' in attrib \ and attrib['bullet'] in self.BULLETS: bullet = self.BULLETS[attrib['bullet']] else: bullet = self.BULLETS[BULLET] elif self.context[-1].tag == NUMBEREDLIST: bullet = self.BULLETS[BULLET] else: assert False, 'Unnested li element' return (bullet, ' ') + tuple(strings) + ('\n',) def is_supported_image(self, path): # Latex only supports limited image formats by default # Whitelist pdf, png, jpg & eps -- all else should become a link if '.' in path: _, ext = path.rsplit('.', 1) return ext.lower() in ('png', 'jpg', 'jpeg', 'eps', 'pdf') else: return False def dump_img(self, tag, attrib, strings=None): imagepath = self.linker.img(attrib['src']) if not self.is_supported_image(imagepath): attrib.setdefault('href', attrib['src']) return self.dump_link(tag, attrib, strings) # We try to get images about the same visual size, # therefore need to specify dot density 96 dpi seems to be # common for computer monitors dpi = 96 if 'width' in attrib and not 'height' in attrib: options = 'width=%fin, keepaspectratio=true' \ % (float(attrib['width']) / dpi) elif 'height' in attrib and not 'width' in attrib: options = 'height=%fin, keepaspectratio=true' \ % (float(attrib['height']) / dpi) elif 'height' in attrib and 'width' in attrib: options = 'height=%fin, width=%fin' \ % (float(attrib['height']) / dpi, float(attrib['width']) / dpi) else: options = '' if imagepath.startswith('file://'): imagepath = File(imagepath).path # avoid URIs here image = '\\includegraphics[%s]{%s}' % (options, imagepath) if 'href' in attrib: href = self.linker.link(attrib['href']) return ['\\href{%s}{%s}' % (href, image)] else: return [image] def dump_link(self, tag, attrib, strings=None): href = self.linker.link(attrib['href']) href = url_encode(href, URL_ENCODE_READABLE) if strings: text = ''.join(strings) else: text = href return ['\\href{%s}{%s}' % (href, text)] def dump_code(self, tag, attrib, strings): # Here we try several possible delimiters for the inline verb # command of LaTeX text = ''.join(strings) for delim in '+*|$&%!-_': if not delim in text: return ['\\lstinline' + delim + text + delim] else: assert False, 'Found no suitable delimiter for verbatim text: %s' % element dump_object_fallback = dump_pre def dump_table(self, tag, attrib, strings): table = [] # result table rows = strings aligns, _wraps = TableParser.get_options(attrib) rowline = lambda row: '&'.join([' ' + cell + ' ' for cell in row]) + '\\tabularnewline\n\hline' aligns = ['l' if a == 'left' else 'r' if a == 'right' else 'c' if a == 'center' else 'l' for a in aligns] for i, row in enumerate(rows): for j, (cell, align) in enumerate(zip(row, aligns)): if '\n' in cell: rows[i][j] = '\shortstack[' + align + ']{' + cell.replace("\n", "\\") + '}' # print table table.append('\\begin{tabular}{ |' + '|'.join(aligns) + '| }') table.append('\hline') table += [rowline(rows[0])] table.append('\hline') table += [rowline(row) for row in rows[1:]] table.append('\end{tabular}') return [line + "\n" for line in table] def dump_line(self, tag, attrib, strings=None): return '\n\\hrule\n'
class Dumper(TextDumper): BULLETS = { UNCHECKED_BOX: '\\item[\\Square]', XCHECKED_BOX: '\\item[\\XBox]', CHECKED_BOX: '\\item[\\CheckedBox]', BULLET: '\\item', } SECTIONING = { 'report': { 1: '\\chapter{%s}', 2: '\\section{%s}', 3: '\\subsection{%s}', 4: '\\subsubsection{%s}', 5: '\\paragraph{%s}' }, 'article': { 1: '\\section{%s}', 2: '\\subsection{%s}', 3: '\\subsubsection{%s}', 4: '\\paragraph{%s}', 5: '\\subparagraph{%s}' }, 'book': { 1: '\\part{%s}', 2: '\\chapter{%s}', 3: '\\section{%s}', 4: '\\subsection{%s}', 5: '\\subsubsection{%s}' } } TAGS = { EMPHASIS: ('\\emph{', '}'), STRONG: ('\\textbf{', '}'), MARK: ('\\uline{', '}'), STRIKE: ('\\sout{', '}'), TAG: ('', ''), # No additional annotation (apart from the visible @) SUBSCRIPT: ('$_{', '}$'), SUPERSCRIPT: ('$^{', '}$'), } TEMPLATE_OPTIONS = { 'document_type': Choice('report', ('report', 'article', 'book')) } def dump(self, tree): assert isinstance(tree, ParseTree) assert self.linker, 'LaTeX dumper needs a linker object' self.document_type = self.template_options['document_type'] logger.info('used document type: %s' % self.document_type) return TextDumper.dump(self, tree) @staticmethod def encode_text(tag, text): return encode_re.sub(lambda m: encode_dict[m.group(1)], text) def dump_pre(self, tag, attrib, strings): indent = int(attrib.get('indent', 0)) text = u''.join(strings) text = text.replace('\n\n', '\n') # remove newlines introduces by encode_text strings = text.splitlines(True) if indent: strings = self.prefix_lines(' ' * indent, strings) strings.insert(0, '\n\\begin{lstlisting}\n') strings.append('\n\\end{lstlisting}\n') return strings def dump_h(self, tag, attrib, strings): level = int(attrib['level']) if level < 1: level = 1 elif level > 5: level = 5 text = u''.join(strings) return [self.SECTIONING[self.document_type][level] % text] def dump_ul(self, tag, attrib, strings): strings.insert(0, '\\begin{itemize}\n') strings.append('\\end{itemize}\n') return TextDumper.dump_ul(self, tag, attrib, strings) def dump_ol(self, tag, attrib, strings): start = attrib.get('start', 1) if start in string.lowercase: type = 'a' start = string.lowercase.index(start) + 1 elif start in string.uppercase: type = 'A' start = string.uppercase.index(start) + 1 else: type = '1' start = int(start) strings.insert(0, '\\begin{enumerate}[%s]\n' % type) if start > 1: strings.insert(1, '\setcounter{enumi}{%i}\n' % (start - 1)) strings.append('\\end{enumerate}\n') return TextDumper.dump_ol(self, tag, attrib, strings) def dump_li(self, tag, attrib, strings): # Always return "\item" for numbered lists if self.context[-1].tag == BULLETLIST: if 'bullet' in attrib \ and attrib['bullet'] in self.BULLETS: bullet = self.BULLETS[attrib['bullet']] else: bullet = self.BULLETS[BULLET] elif self.context[-1].tag == NUMBEREDLIST: bullet = self.BULLETS[BULLET] else: assert False, 'Unnested li element' return (bullet, ' ') + tuple(strings) + ('\n', ) def dump_img(self, tag, attrib, strings=None): # We try to get images about the same visual size, # therefore need to specify dot density 96 dpi seems to be # common for computer monitors dpi = 96 if attrib.get('type') == 'equation': try: # Try to find the source, otherwise fall back to image src = attrib['src'][:-4] + '.tex' file = self.linker.resolve_source_file(src) if file is not None: equation = file.read().strip() else: equation = None except FileNotFoundError: logger.warn('Could not find latex equation: %s', src) else: if equation: return ['\\begin{math}\n', equation, '\n\\end{math}'] if 'width' in attrib and not 'height' in attrib: options = 'width=%fin, keepaspectratio=true' \ % ( float(attrib['width']) / dpi ) elif 'height' in attrib and not 'width' in attrib: options = 'height=%fin, keepaspectratio=true' \ % ( float(attrib['height']) / dpi ) elif 'height' in attrib and 'width' in attrib: options = 'height=%fin, width=%fin' \ % ( float(attrib['height']) / dpi, float(attrib['width']) / dpi ) else: options = '' imagepath = self.linker.img(attrib['src']) if imagepath.startswith('file://'): imagepath = File(imagepath).path # avoid URIs here image = '\\includegraphics[%s]{%s}' % (options, imagepath) if 'href' in attrib: href = self.linker.link(attrib['href']) return ['\\href{%s}{%s}' % (href, image)] else: return [image] def dump_link(self, tag, attrib, strings=None): href = self.linker.link(attrib['href']) if strings: text = u''.join(strings) else: text = href return ['\\href{%s}{%s}' % (href, text)] def dump_code(self, tag, attrib, strings): # Here we try several possible delimiters for the inline verb # command of LaTeX text = u''.join(strings) for delim in '+*|$&%!-_': if not delim in text: return ['\\lstinline' + delim + text + delim] else: assert False, 'Found no suitable delimiter for verbatim text: %s' % element dump_object_fallback = dump_pre