def table_emit(self, node): self._table = MarkupTable(head_prefix=self.table_head_prefix, auto_width=self.table_auto_width, debug_msg=self.debug_msg) self.emit_children(node) content = self._table.get_table_markup() return "%s\n" % content
def table_emit(self, node): """ http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#tables """ self._table = MarkupTable(head_prefix="", auto_width=True, debug_msg=self.debug_msg) self.emit_children(node) content = self._table.get_rest_table() return "%s\n\n" % content
def table_emit(self, node): self._table = MarkupTable( head_prefix=self.table_head_prefix, auto_width=self.table_auto_width, debug_msg=self.debug_msg ) self.emit_children(node) content = self._table.get_table_markup() return "%s\n" % content
def table_emit(self, node): """ http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#tables """ self._table = MarkupTable( head_prefix="", auto_width=True, debug_msg=self.debug_msg ) self.emit_children(node) content = self._table.get_rest_table() return "%s\n\n" % content
class ReStructuredTextEmitter(BaseEmitter): """ Build from a document_tree (html2creole.parser.HtmlParser instance) a creole markup text. """ def __init__(self, *args, **kwargs): super(ReStructuredTextEmitter, self).__init__(*args, **kwargs) self.table_head_prefix = "_. " self.table_auto_width = False self._substitution_data = [] self._used_substitution_links = {} self._used_substitution_images = {} self._list_markup = "" def _get_block_data(self): """ return substitution bock data e.g.: .. _link text: /link/url/ .. |substitution| image:: /image.png """ content = "\n".join(self._substitution_data) self._substitution_data = [] return content #-------------------------------------------------------------------------- def blockdata_pre_emit(self, node): """ pre block -> with newline at the end """ pre_block = self.deentity.replace_all(node.content).strip() pre_block = "\n".join( [" %s" % line for line in pre_block.splitlines()]) return "::\n\n%s\n\n" % pre_block def inlinedata_pre_emit(self, node): """ a pre inline block -> no newline at the end """ return "<pre>%s</pre>" % self.deentity.replace_all(node.content) def blockdata_pass_emit(self, node): return "%s\n\n" % node.content return node.content #-------------------------------------------------------------------------- def emit_children(self, node): """Emit all the children of a node.""" return "".join(self.emit_children_list(node)) def emit(self): """Emit the document represented by self.root DOM tree.""" return self.emit_node(self.root).rstrip() def document_emit(self, node): self.last = node result = self.emit_children(node) if self._substitution_data: # add rest at the end result += "%s\n\n" % self._get_block_data() return result def emit_node(self, node): result = "" if self._substitution_data and node.parent == self.root: result += "%s\n\n" % self._get_block_data() result += super(ReStructuredTextEmitter, self).emit_node(node) return result def p_emit(self, node): return "%s\n\n" % self.emit_children(node) HEADLINE_DATA = { 1: ("=", True), 2: ("-", True), 3: ("=", False), 4: ("-", False), 5: ('`', False), 6: ("'", False), } def headline_emit(self, node): text = self.emit_children(node) level = node.level if level > 6: level = 6 char, both = self.HEADLINE_DATA[level] markup = char * len(text) if both: format = "%(m)s\n%(t)s\n%(m)s\n\n" else: format = "%(t)s\n%(m)s\n\n" return format % {"m": markup, "t": text} #-------------------------------------------------------------------------- def _typeface(self, node, key): return key + self.emit_children(node) + key def strong_emit(self, node): return self._typeface(node, key="**") def b_emit(self, node): return self._typeface(node, key="**") big_emit = strong_emit def i_emit(self, node): return self._typeface(node, key="*") def em_emit(self, node): return self._typeface(node, key="*") def tt_emit(self, node): return self._typeface(node, key="``") def small_emit(self, node): # FIXME: Is there no small in ReSt??? return self.emit_children(node) # def sup_emit(self, node): # return self._typeface(node, key="^") # def sub_emit(self, node): # return self._typeface(node, key="~") # def del_emit(self, node): # return self._typeface(node, key="-") # # def cite_emit(self, node): # return self._typeface(node, key="??") # def ins_emit(self, node): # return self._typeface(node, key="+") # # def span_emit(self, node): # return self._typeface(node, key="%") # def code_emit(self, node): # return self._typeface(node, key="@") #-------------------------------------------------------------------------- def hr_emit(self, node): return "----\n\n" def _should_do_substitution(self, node): node = node.parent if node.kind in DO_SUBSTITUTION: return True if node is not self.root: return self._should_do_substitution(node) else: return False def _get_old_substitution(self, substitution_dict, text, url): if text not in substitution_dict: # save for the next time substitution_dict[text] = url else: # text has links with the same link text old_url = substitution_dict[text] if old_url == url: # same url -> substitution can be reused return old_url else: msg = ( "Duplicate explicit target name:" " substitution was used more than one time, but with different URL." " - link text: %r url1: %r url2: %r") % (text, old_url, url) raise Html2restException(msg) def a_emit(self, node): link_text = self.emit_children(node) url = node.attrs.get("href", None) if url is None: return link_text old_url = self._get_old_substitution(self._used_substitution_links, link_text, url) if self._should_do_substitution(node): # make a hyperlink reference if not old_url: # new substitution self._substitution_data.append(".. _%s: %s" % (link_text, url)) return "`%s`_" % link_text if old_url: # reuse a existing substitution return "`%s`_" % link_text else: # create a inline hyperlink return "`%s <%s>`_" % (link_text, url) def img_emit(self, node): src = node.attrs["src"] if src.split(':')[0] == 'data': return "" title = node.attrs.get("title", "") alt = node.attrs.get("alt", "") if len(alt) > len(title): # Use the longest one substitution_text = alt else: substitution_text = title if substitution_text == "": # Use filename as picture text substitution_text = posixpath.basename(src) old_src = self._get_old_substitution(self._used_substitution_images, substitution_text, src) if not old_src: self._substitution_data.append(".. |%s| image:: %s" % (substitution_text, src)) return "|%s|" % substitution_text #-------------------------------------------------------------------------- def code_emit(self, node): return "``%s``" % self._emit_content(node) #-------------------------------------------------------------------------- def li_emit(self, node): content = self.emit_children(node).strip("\n") result = "\n%s%s %s\n" % (" " * (node.level - 1), self._list_markup, content) return result def _list_emit(self, node, list_type): self._list_markup = list_type content = self.emit_children(node) if node.level == 1: # FIXME: This should be made easier and better complete_list = "\n\n".join( [i.strip("\n") for i in content.split("\n") if i]) content = "%s\n\n" % complete_list return content def ul_emit(self, node): return self._list_emit(node, "*") def ol_emit(self, node): return self._list_emit(node, "#.") def table_emit(self, node): """ http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#tables """ self._table = MarkupTable(head_prefix="", auto_width=True, debug_msg=self.debug_msg) self.emit_children(node) content = self._table.get_rest_table() return "%s\n\n" % content
def test_markup_table_rest(self): t = MarkupTable(head_prefix="") t.add_tr() t.add_th("head1") t.add_th("head2") t.add_tr() t.add_td("1.1.") t.add_td("1.2.") t.add_tr() t.add_td("2.1.") t.add_td("2.2.") table = t.get_rest_table() self.assertEqual2( table, """ +-------+-------+ | head1 | head2 | +=======+=======+ | 1.1. | 1.2. | +-------+-------+ | 2.1. | 2.2. | +-------+-------+ """ )
def test_markup_table_textile(self): t = MarkupTable(head_prefix="_. ", auto_width=False) t.add_tr() t.add_th("head1") t.add_th("head2") t.add_tr() t.add_td("1.1.") t.add_td("1.2.") t.add_tr() t.add_td("2.1.") t.add_td("2.2.") table = t.get_table_markup() self.assertEqual2( table, """ |_. head1|_. head2| |1.1.|1.2.| |2.1.|2.2.| """ )
def test_markup_table_creole(self): t = MarkupTable(head_prefix="* ") t.add_tr() t.add_th("head1") t.add_th("head2") t.add_tr() t.add_td("1.1.") t.add_td("1.2.") t.add_tr() t.add_td("2.1.") t.add_td("2.2.") table = t.get_table_markup() self.assertEqual2( table, """ |* head1 |* head2 | | 1.1. | 1.2. | | 2.1. | 2.2. | """ )
class ReStructuredTextEmitter(BaseEmitter): """ Build from a document_tree (html2creole.parser.HtmlParser instance) a creole markup text. """ def __init__(self, *args, **kwargs): super(ReStructuredTextEmitter, self).__init__(*args, **kwargs) self.table_head_prefix = "_. " self.table_auto_width = False self._substitution_data = [] self._used_substitution_links = {} self._used_substitution_images = {} self._list_markup = "" def _get_block_data(self): """ return substitution bock data e.g.: .. _link text: /link/url/ .. |substitution| image:: /image.png """ content = "\n".join(self._substitution_data) self._substitution_data = [] return content #-------------------------------------------------------------------------- def blockdata_pre_emit(self, node): """ pre block -> with newline at the end """ pre_block = self.deentity.replace_all(node.content).strip() pre_block = "\n".join([" %s" % line for line in pre_block.splitlines()]) return "::\n\n%s\n\n" % pre_block def inlinedata_pre_emit(self, node): """ a pre inline block -> no newline at the end """ return "<pre>%s</pre>" % self.deentity.replace_all(node.content) def blockdata_pass_emit(self, node): return "%s\n\n" % node.content return node.content #-------------------------------------------------------------------------- def emit_children(self, node): """Emit all the children of a node.""" return "".join(self.emit_children_list(node)) def emit(self): """Emit the document represented by self.root DOM tree.""" return self.emit_node(self.root).rstrip() def document_emit(self, node): self.last = node result = self.emit_children(node) if self._substitution_data: # add rest at the end result += "%s\n\n" % self._get_block_data() return result def emit_node(self, node): result = "" if self._substitution_data and node.parent == self.root: result += "%s\n\n" % self._get_block_data() result += super(ReStructuredTextEmitter, self).emit_node(node) return result def p_emit(self, node): return "%s\n\n" % self.emit_children(node) HEADLINE_DATA = { 1:("=", True), 2:("-", True), 3:("^", True), 4:("\"", True), 5:('`', False), 6:("'", False), } def headline_emit(self, node): text = self.emit_children(node) level = node.level if level > 6: level = 6 char, both = self.HEADLINE_DATA[level] markup = char * len(text) if both: format = "%(m)s\n%(t)s\n%(m)s\n\n" else: format = "%(t)s\n%(m)s\n\n" return format % {"m":markup, "t":text} #-------------------------------------------------------------------------- def _typeface(self, node, key): return key + self.emit_children(node) + key def strong_emit(self, node): return self._typeface(node, key="**") def b_emit(self, node): return self._typeface(node, key="**") big_emit = strong_emit def i_emit(self, node): return self._typeface(node, key="*") def em_emit(self, node): return self._typeface(node, key="*") def tt_emit(self, node): return self._typeface(node, key="``") def small_emit(self, node): # FIXME: Is there no small in ReSt??? return self.emit_children(node) # def sup_emit(self, node): # return self._typeface(node, key="^") # def sub_emit(self, node): # return self._typeface(node, key="~") # def del_emit(self, node): # return self._typeface(node, key="-") # # def cite_emit(self, node): # return self._typeface(node, key="??") # def ins_emit(self, node): # return self._typeface(node, key="+") # # def span_emit(self, node): # return self._typeface(node, key="%") # def code_emit(self, node): # return self._typeface(node, key="@") #-------------------------------------------------------------------------- def hr_emit(self, node): return "----\n\n" def _should_do_substitution(self, node): node = node.parent if node.kind in DO_SUBSTITUTION: return True if node is not self.root: return self._should_do_substitution(node) else: return False def _get_old_substitution(self, substitution_dict, text, url): if text not in substitution_dict: # save for the next time substitution_dict[text] = url else: # text has links with the same link text old_url = substitution_dict[text] if old_url == url: # same url -> substitution can be reused return old_url else: msg = ( "Duplicate explicit target name:" " substitution was used more than one time, but with different URL." " - link text: %r url1: %r url2: %r" ) % (text, old_url, url) raise Html2restException(msg) def a_emit(self, node): link_text = self.emit_children(node) if 'id' in node.attrs: return ".. _%s: " % node.attrs['id'] url = node.attrs["href"] old_url = self._get_old_substitution(self._used_substitution_links, link_text, url) if self._should_do_substitution(node): # make a hyperlink reference if not old_url: # new substitution self._substitution_data.append( ".. _%s: %s" % (link_text, url) ) return "`%s`_" % link_text if old_url: # reuse a existing substitution return "`%s`_" % link_text else: # create a inline hyperlink return "`%s <%s>`_" % (link_text, url) def img_emit(self, node): src = node.attrs["src"] if src.split(':')[0] == 'data': return "" title = node.attrs.get("title", "") alt = node.attrs.get("alt", "") if len(alt) > len(title): # Use the longest one substitution_text = alt else: substitution_text = title if substitution_text == "": # Use filename as picture text substitution_text = posixpath.basename(src) old_src = self._get_old_substitution( self._used_substitution_images, substitution_text, src ) if not old_src: width = node.attrs.get("width", "") align = node.attrs.get("align", "") image_str = ".. |%s| image:: %s" % (substitution_text, src) if width: image_str += "\n :width: {0}".format(width) if align: image_str += "\n :align: {0}".format(align) self._substitution_data.append( image_str ) return "|%s|" % substitution_text #-------------------------------------------------------------------------- def code_emit(self, node): return "``%s``" % self._emit_content(node) #-------------------------------------------------------------------------- def li_emit(self, node): content = self.emit_children(node).strip("\n") result = "\n%s%s %s\n" % ( " " * (node.level - 1), self._list_markup, content ) return result def _list_emit(self, node, list_type): self._list_markup = list_type content = self.emit_children(node) if node.level == 1: # FIXME: This should be made easier and better complete_list = "\n\n".join([i.strip("\n") for i in content.split("\n") if i]) content = "%s\n\n" % complete_list return content def ul_emit(self, node): return self._list_emit(node, "*") def ol_emit(self, node): return self._list_emit(node, "#.") def table_emit(self, node): """ http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#tables """ self._table = MarkupTable( head_prefix="", auto_width=True, debug_msg=self.debug_msg ) self.emit_children(node) content = self._table.get_rest_table() return "%s\n\n" % content
class BaseEmitter(object): """ Build from a document_tree (html2creole.parser.HtmlParser instance) a creole markup text. """ def __init__(self, document_tree, unknown_emit=None, debug=False): self.root = document_tree if unknown_emit is None: self._unknown_emit = transparent_unknown_nodes else: self._unknown_emit = unknown_emit self.last = None self.debugging = debug self.deentity = Deentity() # for replacing html entities self._inner_list = "" self._mask_linebreak = False #-------------------------------------------------------------------------- def blockdata_pass_emit(self, node): return "%s\n\n" % node.content return node.content #-------------------------------------------------------------------------- def data_emit(self, node): #node.debug() return node.content def entityref_emit(self, node): """ emit a named html entity """ entity = node.content try: return self.deentity.replace_named(entity) except KeyError as err: if self.debugging: print("unknown html entity found: %r" % entity) return "&%s" % entity # FIXME except UnicodeDecodeError as err: raise UnicodeError("Error handling entity %r: %s" % (entity, err)) def charref_emit(self, node): """ emit a not named html entity """ entity = node.content if entity.startswith("x"): # entity in hex hex_no = entity[1:] return self.deentity.replace_hex(hex_no) else: # entity as a unicode number return self.deentity.replace_number(entity) #-------------------------------------------------------------------------- def p_emit(self, node): return "%s\n\n" % self.emit_children(node) def br_emit(self, node): if self._inner_list != "": return "\\\\" else: return "\n" #-------------------------------------------------------------------------- def _typeface(self, node, key): return key + self.emit_children(node) + key #-------------------------------------------------------------------------- def li_emit(self, node): content = self.emit_children(node) return "\n%s %s" % (self._inner_list, content) def _list_emit(self, node, list_type): start_newline = False if self.last and self.last.kind not in BLOCK_TAGS: if not self.last.content or not self.last.content.endswith("\n"): start_newline = True if self._inner_list == "": # Start a new list self._inner_list = list_type else: self._inner_list += list_type content = "%s" % self.emit_children(node) self._inner_list = self._inner_list[:-1] if self._inner_list == "": # Start a new list if start_newline: return "\n" + content + "\n\n" else: return content.strip() + "\n\n" else: return content #-------------------------------------------------------------------------- def table_emit(self, node): self._table = MarkupTable(head_prefix=self.table_head_prefix, auto_width=self.table_auto_width, debug_msg=self.debug_msg) self.emit_children(node) content = self._table.get_table_markup() return "%s\n" % content def tr_emit(self, node): self._table.add_tr() self.emit_children(node) return "" def _escape_linebreaks(self, text): text = text.strip() text = text.split("\n") lines = [line.strip() for line in text] lines = [line for line in lines if line] content = "\\\\".join(lines) content = content.strip("\\") return content def th_emit(self, node): content = self.emit_children(node) content = self._escape_linebreaks(content) self._table.add_th(content) return "" def td_emit(self, node): content = self.emit_children(node) content = self._escape_linebreaks(content) self._table.add_td(content) return "" #-------------------------------------------------------------------------- def _emit_content(self, node): content = self.emit_children(node) content = self._escape_linebreaks(content) if node.kind in BLOCK_TAGS: content = "%s\n\n" % content return content def div_emit(self, node): return self._emit_content(node) def span_emit(self, node): return self._emit_content(node) #-------------------------------------------------------------------------- def document_emit(self, node): self.last = node return self.emit_children(node) def emit_children(self, node): """Emit all the children of a node.""" return "".join(self.emit_children_list(node)) def emit_children_list(self, node): """Emit all the children of a node.""" self.last = node result = [] for child in node.children: content = self.emit_node(child) assert isinstance(content, TEXT_TYPE) result.append(content) return result def emit_node(self, node): """Emit a single node.""" def unicode_error(method_name, method, node, content): node.debug() raise AssertionError( "Method '%s' (%s) returns no unicode - returns: %s (%s)" % (method_name, method, repr(content), type(content))) if node.level: self.debug_msg( "emit_node", "%s (level: %i): %r" % (node.kind, node.level, node.content)) else: self.debug_msg("emit_node", "%s: %r" % (node.kind, node.content)) method_name = "%s_emit" % node.kind emit_method = getattr(self, method_name, None) if emit_method: content = emit_method(node) if not isinstance(content, TEXT_TYPE): unicode_error(method_name, emit_method, node, content) else: content = self._unknown_emit(self, node) if not isinstance(content, TEXT_TYPE): unicode_error(method_name, self._unknown_emit, node, content) self.last = node return content # def emit(self): # """Emit the document represented by self.root DOM tree.""" # result = self.emit_node(self.root) ## return result.strip() # FIXME # return result.rstrip() # FIXME #------------------------------------------------------------------------- def debug_msg(self, method, txt): if not self.debugging: return print("%13s: %s" % (method, txt))
class BaseEmitter(object): """ Build from a document_tree (html2creole.parser.HtmlParser instance) a creole markup text. """ def __init__(self, document_tree, unknown_emit=None, debug=False): self.root = document_tree if unknown_emit is None: self._unknown_emit = transparent_unknown_nodes else: self._unknown_emit = unknown_emit self.last = None self.debugging = debug self.deentity = Deentity() # for replacing html entities self._inner_list = "" self._mask_linebreak = False #-------------------------------------------------------------------------- def blockdata_pass_emit(self, node): return "%s\n\n" % node.content return node.content #-------------------------------------------------------------------------- def data_emit(self, node): #node.debug() return node.content def entityref_emit(self, node): """ emit a named html entity """ entity = node.content try: return self.deentity.replace_named(entity) except KeyError as err: if self.debugging: print("unknown html entity found: %r" % entity) return "&%s" % entity # FIXME except UnicodeDecodeError as err: raise UnicodeError( "Error handling entity %r: %s" % (entity, err) ) def charref_emit(self, node): """ emit a not named html entity """ entity = node.content if entity.startswith("x"): # entity in hex hex_no = entity[1:] return self.deentity.replace_hex(hex_no) else: # entity as a unicode number return self.deentity.replace_number(entity) #-------------------------------------------------------------------------- def p_emit(self, node): return "%s\n\n" % self.emit_children(node) def br_emit(self, node): if self._inner_list != "": return "\\\\" else: return "\n" #-------------------------------------------------------------------------- def _typeface(self, node, key): return key + self.emit_children(node) + key #-------------------------------------------------------------------------- def li_emit(self, node): content = self.emit_children(node) return "\n%s %s" % (self._inner_list, content) def _list_emit(self, node, list_type): start_newline = False if self.last and self.last.kind not in BLOCK_TAGS: if not self.last.content or not self.last.content.endswith("\n"): start_newline = True if self._inner_list == "": # Start a new list self._inner_list = list_type else: self._inner_list += list_type content = "%s" % self.emit_children(node) self._inner_list = self._inner_list[:-1] if self._inner_list == "": # Start a new list if start_newline: return "\n" + content + "\n\n" else: return content.strip() + "\n\n" else: return content #-------------------------------------------------------------------------- def table_emit(self, node): self._table = MarkupTable( head_prefix=self.table_head_prefix, auto_width=self.table_auto_width, debug_msg=self.debug_msg ) self.emit_children(node) content = self._table.get_table_markup() return "%s\n" % content def tr_emit(self, node): self._table.add_tr() self.emit_children(node) return "" def _escape_linebreaks(self, text): text = text.strip() text = text.split("\n") lines = [line.strip() for line in text] lines = [line for line in lines if line] content = "\\\\".join(lines) content = content.strip("\\") return content def th_emit(self, node): content = self.emit_children(node) content = self._escape_linebreaks(content) self._table.add_th(content) return "" def td_emit(self, node): content = self.emit_children(node) content = self._escape_linebreaks(content) self._table.add_td(content) return "" #-------------------------------------------------------------------------- def _emit_content(self, node): content = self.emit_children(node) content = self._escape_linebreaks(content) if node.kind in BLOCK_TAGS: content = "%s\n\n" % content return content def div_emit(self, node): return self._emit_content(node) def span_emit(self, node): return self._emit_content(node) #-------------------------------------------------------------------------- def document_emit(self, node): self.last = node return self.emit_children(node) def emit_children(self, node): """Emit all the children of a node.""" return "".join(self.emit_children_list(node)) def emit_children_list(self, node): """Emit all the children of a node.""" self.last = node result = [] for child in node.children: content = self.emit_node(child) assert isinstance(content, TEXT_TYPE) result.append(content) return result def emit_node(self, node): """Emit a single node.""" def unicode_error(method_name, method, node, content): node.debug() raise AssertionError( "Method '%s' (%s) returns no unicode - returns: %s (%s)" % ( method_name, method, repr(content), type(content) ) ) if node.level: self.debug_msg("emit_node", "%s (level: %i): %r" % (node.kind, node.level, node.content)) else: self.debug_msg("emit_node", "%s: %r" % (node.kind, node.content)) method_name = "%s_emit" % node.kind emit_method = getattr(self, method_name, None) if emit_method: content = emit_method(node) if not isinstance(content, TEXT_TYPE): unicode_error(method_name, emit_method, node, content) else: content = self._unknown_emit(self, node) if not isinstance(content, TEXT_TYPE): unicode_error(method_name, self._unknown_emit, node, content) self.last = node return content # def emit(self): # """Emit the document represented by self.root DOM tree.""" # result = self.emit_node(self.root) ## return result.strip() # FIXME # return result.rstrip() # FIXME #------------------------------------------------------------------------- def debug_msg(self, method, txt): if not self.debugging: return print("%13s: %s" % (method, txt))