Esempio n. 1
0
    def __init__(self, document_tree, unknown_emit=None, debug=False):
        self.root = document_tree

        if unknown_emit is None:
            self._unknown_emit = transparent_unknown_nodes
        else:
            self._unknown_emit = unknown_emit

        self.last = None
        self.debugging = debug

        self.deentity = Deentity()  # for replacing html entities
        self._inner_list = ""
        self._mask_linebreak = False
Esempio n. 2
0
    def __init__(self, document_tree, unknown_emit=None, debug=False):
        self.root = document_tree

        if unknown_emit is None:
            self._unknown_emit = transparent_unknown_nodes
        else:
            self._unknown_emit = unknown_emit

        self.last = None
        self.debugging = debug

        self.deentity = Deentity() # for replacing html entities
        self._inner_list = ""
        self._mask_linebreak = False
Esempio n. 3
0
class BaseEmitter(object):
    """
    Build from a document_tree (html2creole.parser.HtmlParser instance) a
    creole markup text.
    """
    def __init__(self, document_tree, unknown_emit=None, debug=False):
        self.root = document_tree

        if unknown_emit is None:
            self._unknown_emit = transparent_unknown_nodes
        else:
            self._unknown_emit = unknown_emit

        self.last = None
        self.debugging = debug

        self.deentity = Deentity()  # for replacing html entities
        self._inner_list = ""
        self._mask_linebreak = False

    #--------------------------------------------------------------------------

    def blockdata_pass_emit(self, node):
        return "%s\n\n" % node.content
        return node.content

    #--------------------------------------------------------------------------

    def data_emit(self, node):
        #node.debug()
        return node.content

    def entityref_emit(self, node):
        """
        emit a named html entity
        """
        entity = node.content

        try:
            return self.deentity.replace_named(entity)
        except KeyError as err:
            if self.debugging:
                print("unknown html entity found: %r" % entity)
            return "&%s" % entity  # FIXME
        except UnicodeDecodeError as err:
            raise UnicodeError("Error handling entity %r: %s" % (entity, err))

    def charref_emit(self, node):
        """
        emit a not named html entity
        """
        entity = node.content

        if entity.startswith("x"):
            # entity in hex
            hex_no = entity[1:]
            return self.deentity.replace_hex(hex_no)
        else:
            # entity as a unicode number
            return self.deentity.replace_number(entity)

    #--------------------------------------------------------------------------

    def p_emit(self, node):
        return "%s\n\n" % self.emit_children(node)

    def br_emit(self, node):
        if self._inner_list != "":
            return "\\\\"
        else:
            return "\n"

    #--------------------------------------------------------------------------

    def _typeface(self, node, key):
        return key + self.emit_children(node) + key

    #--------------------------------------------------------------------------

    def li_emit(self, node):
        content = self.emit_children(node)
        return "\n%s %s" % (self._inner_list, content)

    def _list_emit(self, node, list_type):
        start_newline = False
        if self.last and self.last.kind not in BLOCK_TAGS:
            if not self.last.content or not self.last.content.endswith("\n"):
                start_newline = True

        if self._inner_list == "":  # Start a new list
            self._inner_list = list_type
        else:
            self._inner_list += list_type

        content = "%s" % self.emit_children(node)

        self._inner_list = self._inner_list[:-1]

        if self._inner_list == "":  # Start a new list
            if start_newline:
                return "\n" + content + "\n\n"
            else:
                return content.strip() + "\n\n"
        else:
            return content

    #--------------------------------------------------------------------------

    def table_emit(self, node):
        self._table = MarkupTable(head_prefix=self.table_head_prefix,
                                  auto_width=self.table_auto_width,
                                  debug_msg=self.debug_msg)
        self.emit_children(node)
        content = self._table.get_table_markup()
        return "%s\n" % content

    def tr_emit(self, node):
        self._table.add_tr()
        self.emit_children(node)
        return ""

    def _escape_linebreaks(self, text):
        text = text.strip()
        text = text.split("\n")
        lines = [line.strip() for line in text]
        lines = [line for line in lines if line]
        content = "\\\\".join(lines)
        content = content.strip("\\")
        return content

    def th_emit(self, node):
        content = self.emit_children(node)
        content = self._escape_linebreaks(content)
        self._table.add_th(content)
        return ""

    def td_emit(self, node):
        content = self.emit_children(node)
        content = self._escape_linebreaks(content)
        self._table.add_td(content)
        return ""

    #--------------------------------------------------------------------------

    def _emit_content(self, node):
        content = self.emit_children(node)
        content = self._escape_linebreaks(content)
        if node.kind in BLOCK_TAGS:
            content = "%s\n\n" % content
        return content

    def div_emit(self, node):
        return self._emit_content(node)

    def span_emit(self, node):
        return self._emit_content(node)

    #--------------------------------------------------------------------------

    def document_emit(self, node):
        self.last = node
        return self.emit_children(node)

    def emit_children(self, node):
        """Emit all the children of a node."""
        return "".join(self.emit_children_list(node))

    def emit_children_list(self, node):
        """Emit all the children of a node."""
        self.last = node
        result = []
        for child in node.children:
            content = self.emit_node(child)
            assert isinstance(content, TEXT_TYPE)
            result.append(content)
        return result

    def emit_node(self, node):
        """Emit a single node."""
        def unicode_error(method_name, method, node, content):
            node.debug()
            raise AssertionError(
                "Method '%s' (%s) returns no unicode - returns: %s (%s)" %
                (method_name, method, repr(content), type(content)))

        if node.level:
            self.debug_msg(
                "emit_node",
                "%s (level: %i): %r" % (node.kind, node.level, node.content))
        else:
            self.debug_msg("emit_node", "%s: %r" % (node.kind, node.content))

        method_name = "%s_emit" % node.kind
        emit_method = getattr(self, method_name, None)

        if emit_method:
            content = emit_method(node)
            if not isinstance(content, TEXT_TYPE):
                unicode_error(method_name, emit_method, node, content)
        else:
            content = self._unknown_emit(self, node)
            if not isinstance(content, TEXT_TYPE):
                unicode_error(method_name, self._unknown_emit, node, content)

        self.last = node
        return content

#    def emit(self):
#        """Emit the document represented by self.root DOM tree."""
#        result = self.emit_node(self.root)
##        return result.strip() # FIXME
#        return result.rstrip() # FIXME

#-------------------------------------------------------------------------

    def debug_msg(self, method, txt):
        if not self.debugging:
            return
        print("%13s: %s" % (method, txt))
Esempio n. 4
0
class BaseEmitter(object):
    """
    Build from a document_tree (html2creole.parser.HtmlParser instance) a
    creole markup text.
    """
    def __init__(self, document_tree, unknown_emit=None, debug=False):
        self.root = document_tree

        if unknown_emit is None:
            self._unknown_emit = transparent_unknown_nodes
        else:
            self._unknown_emit = unknown_emit

        self.last = None
        self.debugging = debug

        self.deentity = Deentity() # for replacing html entities
        self._inner_list = ""
        self._mask_linebreak = False

    #--------------------------------------------------------------------------

    def blockdata_pass_emit(self, node):
        return "%s\n\n" % node.content
        return node.content

    #--------------------------------------------------------------------------

    def data_emit(self, node):
        #node.debug()
        return node.content

    def entityref_emit(self, node):
        """
        emit a named html entity
        """
        entity = node.content

        try:
            return self.deentity.replace_named(entity)
        except KeyError as err:
            if self.debugging:
                print("unknown html entity found: %r" % entity)
            return "&%s" % entity # FIXME
        except UnicodeDecodeError as err:
            raise UnicodeError(
                "Error handling entity %r: %s" % (entity, err)
            )

    def charref_emit(self, node):
        """
        emit a not named html entity
        """
        entity = node.content

        if entity.startswith("x"):
            # entity in hex
            hex_no = entity[1:]
            return self.deentity.replace_hex(hex_no)
        else:
            # entity as a unicode number
            return self.deentity.replace_number(entity)

    #--------------------------------------------------------------------------

    def p_emit(self, node):
        return "%s\n\n" % self.emit_children(node)

    def br_emit(self, node):
        if self._inner_list != "":
            return "\\\\"
        else:
            return "\n"

    #--------------------------------------------------------------------------

    def _typeface(self, node, key):
        return key + self.emit_children(node) + key

    #--------------------------------------------------------------------------

    def li_emit(self, node):
        content = self.emit_children(node)
        return "\n%s %s" % (self._inner_list, content)

    def _list_emit(self, node, list_type):
        start_newline = False
        if self.last and self.last.kind not in BLOCK_TAGS:
            if not self.last.content or not self.last.content.endswith("\n"):
                start_newline = True

        if self._inner_list == "": # Start a new list
            self._inner_list = list_type
        else:
            self._inner_list += list_type

        content = "%s" % self.emit_children(node)

        self._inner_list = self._inner_list[:-1]

        if self._inner_list == "": # Start a new list
            if start_newline:
                return "\n" + content + "\n\n"
            else:
                return content.strip() + "\n\n"
        else:
            return content

    #--------------------------------------------------------------------------

    def table_emit(self, node):
        self._table = MarkupTable(
            head_prefix=self.table_head_prefix,
            auto_width=self.table_auto_width,
            debug_msg=self.debug_msg
        )
        self.emit_children(node)
        content = self._table.get_table_markup()
        return "%s\n" % content

    def tr_emit(self, node):
        self._table.add_tr()
        self.emit_children(node)
        return ""

    def _escape_linebreaks(self, text):
        text = text.strip()
        text = text.split("\n")
        lines = [line.strip() for line in text]
        lines = [line for line in lines if line]
        content = "\\\\".join(lines)
        content = content.strip("\\")
        return content

    def th_emit(self, node):
        content = self.emit_children(node)
        content = self._escape_linebreaks(content)
        self._table.add_th(content)
        return ""

    def td_emit(self, node):
        content = self.emit_children(node)
        content = self._escape_linebreaks(content)
        self._table.add_td(content)
        return ""

    #--------------------------------------------------------------------------

    def _emit_content(self, node):
        content = self.emit_children(node)
        content = self._escape_linebreaks(content)
        if node.kind in BLOCK_TAGS:
            content = "%s\n\n" % content
        return content

    def div_emit(self, node):
        return self._emit_content(node)

    def span_emit(self, node):
        return self._emit_content(node)

    #--------------------------------------------------------------------------

    def document_emit(self, node):
        self.last = node
        return self.emit_children(node)

    def emit_children(self, node):
        """Emit all the children of a node."""
        return "".join(self.emit_children_list(node))

    def emit_children_list(self, node):
        """Emit all the children of a node."""
        self.last = node
        result = []
        for child in node.children:
            content = self.emit_node(child)
            assert isinstance(content, TEXT_TYPE)
            result.append(content)
        return result

    def emit_node(self, node):
        """Emit a single node."""
        def unicode_error(method_name, method, node, content):
            node.debug()
            raise AssertionError(
                "Method '%s' (%s) returns no unicode - returns: %s (%s)" % (
                    method_name, method, repr(content), type(content)
                )
            )

        if node.level:
            self.debug_msg("emit_node", "%s (level: %i): %r" % (node.kind, node.level, node.content))
        else:
            self.debug_msg("emit_node", "%s: %r" % (node.kind, node.content))

        method_name = "%s_emit" % node.kind
        emit_method = getattr(self, method_name, None)

        if emit_method:
            content = emit_method(node)
            if not isinstance(content, TEXT_TYPE):
                unicode_error(method_name, emit_method, node, content)
        else:
            content = self._unknown_emit(self, node)
            if not isinstance(content, TEXT_TYPE):
                unicode_error(method_name, self._unknown_emit, node, content)

        self.last = node
        return content

#    def emit(self):
#        """Emit the document represented by self.root DOM tree."""
#        result = self.emit_node(self.root)
##        return result.strip() # FIXME
#        return result.rstrip() # FIXME

    #-------------------------------------------------------------------------

    def debug_msg(self, method, txt):
        if not self.debugging:
            return
        print("%13s: %s" % (method, txt))