Example #1
0
 def _image_repl(self, groups):
     """Handles images and attachemnts included in the page."""
     target = groups.get('image_target', "").strip()
     text = (groups.get('image_text', "") or "").strip()
     node = DocNode("image", self.cur, target)
     DocNode('text', node, text or node.content)
     self.text = None
Example #2
0
 def _item_repl(self, groups):
     """ List item """
     bullet = groups.get('item_head', "")
     text = groups.get('item_text', "")
     if bullet[-1] == '#':
         kind = 'number_list'
     else:
         kind = 'bullet_list'
     level = len(bullet) - 1
     lst = self.cur
     # Find a list of the same kind and level up the tree
     while (lst and not (lst.kind in ('number_list', 'bullet_list')
                         and lst.level == level)
            and not lst.kind in ('document', 'section', 'blockquote')):
         lst = lst.parent
     if lst and lst.kind == kind:
         self.cur = lst
     else:
         # Create a new level of list
         self.cur = self._upto(
             self.cur, ('list_item', 'document', 'section', 'blockquote'))
         self.cur = DocNode(kind, self.cur)
         self.cur.level = level
     self.cur = DocNode('list_item', self.cur)
     self.cur.level = level + 1
     self.parse_inline(text)
     self.text = None
Example #3
0
    def handle_starttag(self, tag, attrs):
        self.debug_msg("starttag", f"{tag!r} atts: {attrs}")

        if tag in IGNORE_TAGS:
            return

        headline = headline_tag_re.match(tag)
        if headline:
            self.cur = DocNode("headline",
                               self.cur,
                               level=int(headline.group(1)))
            return

        if tag in ("li", "ul", "ol"):
            if tag in ("ul", "ol"):
                self.__list_level += 1
            self.cur = DocNode(tag,
                               self.cur,
                               None,
                               attrs,
                               level=self.__list_level)
        elif tag in ("img", "br"):
            # Work-a-round if img or br  tag is not marked as startendtag:
            # wrong: <img src="/image.jpg"> doesn't work if </img> not exist
            # right: <img src="/image.jpg" />
            DocNode(tag, self.cur, None, attrs)
        else:
            self.cur = DocNode(tag, self.cur, None, attrs)
Example #4
0
    def _add_macro(self,
                   groups,
                   macro_type,
                   name_key,
                   args_key,
                   text_key=None):
        """
        generic method to handle the macro, used for all variants:
        inline, inline-tag, block
        """
        #self.debug_groups(groups)
        assert macro_type in ("macro_inline", "macro_block")

        if text_key:
            macro_text = groups.get(text_key, "").strip()
        else:
            macro_text = None

        node = DocNode(macro_type, self.cur, macro_text)
        macro_name = groups[name_key]
        node.macro_name = macro_name
        self.root.used_macros.add(macro_name)
        node.macro_args = groups.get(args_key, "").strip()

        self.text = None
Example #5
0
 def _pre_block_repl(self, groups):
     self._upto_block()
     kind = groups.get('pre_block_kind', None)
     text = groups.get('pre_block_text', "")
     def remove_tilde(m):
         return m.group('indent') + m.group('rest')
     text = self.pre_escape_re.sub(remove_tilde, text)
     node = DocNode('pre_block', self.cur, text)
     node.sect = kind or ''
     self.text = None
Example #6
0
    def _pre_block_repl(self, groups):
        self._upto_block()
        kind = groups.get("pre_block_kind", None)
        text = groups.get("pre_block_text", "")

        def remove_tilde(m):
            return m.group("indent") + m.group("rest")

        text = self.pre_escape_re.sub(remove_tilde, text)
        node = DocNode("pre_block", self.cur, text)
        node.sect = kind or ""
        self.text = None
Example #7
0
    def _pre_block_repl(self, groups):
        self._upto_block()
        kind = groups.get('pre_block_kind', None)
        text = groups.get('pre_block_text', "")

        def remove_tilde(m):
            return m.group('indent') + m.group('rest')

        text = self.pre_escape_re.sub(remove_tilde, text)
        node = DocNode('pre_block', self.cur, text)
        node.sect = kind or ''
        self.text = None
Example #8
0
 def handle_startendtag(self, tag, attrs):
     self.debug_msg("startendtag", "%r atts: %s" % (tag, attrs))
     attr_dict = dict(attrs)
     if tag in (self._block_placeholder, self._inline_placeholder):
         id = int(attr_dict["id"])
         #            block_type = attr_dict["type"]
         DocNode(
             "%s_%s" % (tag, attr_dict["type"]),
             self.cur,
             content=self.blockdata[id],
             #                attrs = attr_dict
         )
     else:
         DocNode(tag, self.cur, None, attrs)
Example #9
0
 def handle_startendtag(self, tag, attrs):
     self.debug_msg("startendtag", f"{tag!r} atts: {attrs}")
     attr_dict = dict(attrs)
     if tag in (self._block_placeholder, self._inline_placeholder):
         id = int(attr_dict["id"])
         #            block_type = attr_dict["type"]
         DocNode(
             f"{tag}_{attr_dict['type']}",
             self.cur,
             content=self.blockdata[id],
             #                attrs = attr_dict
         )
     else:
         DocNode(tag, self.cur, None, attrs)
Example #10
0
 def _url_repl(self, groups):
     """Handle raw urls in text."""
     if not groups.get('escaped_url'):
         # this url is NOT escaped
         target = groups.get('url_target', "")
         node = DocNode('link', self.cur)
         node.content = target
         DocNode('text', node, node.content)
         self.text = None
     else:
         # this url is escaped, we render it as text
         if self.text is None:
             self.text = DocNode('text', self.cur, "")
         self.text.content += groups.get('url_target')
Example #11
0
 def _url_repl(self, groups):
     """Handle raw urls in text."""
     if not groups.get('escaped_url'):
         # this url is NOT escaped
         target = groups.get('url_target', "")
         node = DocNode('link', self.cur)
         node.content = target
         DocNode('text', node, node.content)
         self.text = None
     else:
         # this url is escaped, we render it as text
         if self.text is None:
             self.text = DocNode('text', self.cur, "")
         self.text.content += groups.get('url_target')
    def __init__(self,
                 raw,
                 block_rules=None,
                 blog_line_breaks=True,
                 debug=False):
        assert isinstance(raw, str)
        self.raw = raw

        if block_rules is None:
            block_rules = BlockRules(blog_line_breaks=blog_line_breaks)

        self.blog_line_breaks = blog_line_breaks
        self.debug = debug  # TODO: use logging

        # setup block element rules:
        self.block_re = re.compile('|'.join(block_rules.rules),
                                   block_rules.re_flags)

        self.root = DocNode('document', None)
        self.cur = self.root  # The most recent document node
        self.text = None  # The node to add inline characters to
        self.last_text_break = None  # Last break node, inserted by _text_repl()

        # Filled with all macros that's in the text
        self.root.used_macros = set()
Example #13
0
    def _inline_mark(self, groups, key):
        self.cur = DocNode(key, self.cur)

        self.text = None
        text = groups["%s_text" % key]
        self.parse_inline(text)

        self.cur = self._upto(self.cur, (key, )).parent
        self.text = None
Example #14
0
    def _add_macro(self, groups, macro_type, name_key, args_key, text_key=None):
        """
        generic mathod to handle the macro, used for all variants:
        inline, inline-tag, block
        """
        #self.debug_groups(groups)
        assert macro_type in ("macro_inline", "macro_block")

        if text_key:
            macro_text = groups.get(text_key, "").strip()
        else:
            macro_text = None

        node = DocNode(macro_type, self.cur, macro_text)
        node.macro_name = groups[name_key]
        node.macro_args = groups.get(args_key, "").strip()

        self.text = None
Example #15
0
 def _link_repl(self, groups):
     """Handle all kinds of links."""
     target = groups.get('link_target', "")
     text = (groups.get('link_text', "") or "").strip()
     parent = self.cur
     self.cur = DocNode('link', self.cur)
     self.cur.content = target
     self.text = None
     re.sub(self.link_re, self._replace, text)
     self.cur = parent
     self.text = None
Example #16
0
    def _table_repl(self, groups):
        row = groups.get('table', '|').strip()
        self.cur = self._upto(self.cur,
                              ('table', 'document', 'section', 'blockquote'))
        if self.cur.kind != 'table':
            self.cur = DocNode('table', self.cur)
        tb = self.cur
        tr = DocNode('table_row', tb)

        for m in self.cell_re.finditer(row):
            cell = m.group('cell')
            if cell:
                text = cell.strip()
                self.cur = DocNode('table_cell', tr)
                self.text = None
            else:
                text = m.group('head').strip('= ')
                self.cur = DocNode('table_head', tr)
                self.text = DocNode('text', self.cur, "")
            self.parse_inline(text)

        self.cur = tb
        self.text = None
Example #17
0
    def _text_repl(self, groups):
        #        print("_text_repl()", self.cur.kind)
        #        self.debug_groups(groups)

        if self.cur.kind in ('table', 'table_row', 'bullet_list',
                             'number_list'):
            self._upto_block()

        if self.cur.kind in ('document', 'section', 'blockquote'):
            self.cur = DocNode('paragraph', self.cur)

        text = groups.get('text', "")

        if groups.get('space'):
            # use wikipedia style line breaks and seperate a new line with one space
            text = " " + text

        self.parse_inline(text)

        if groups.get('break') and self.cur.kind in ('paragraph', 'emphasis',
                                                     'strong', 'pre_inline'):
            self.last_text_break = DocNode('break', self.cur, "")

        self.text = None
Example #18
0
    def __init__(self, debug=False):
        super().__init__(convert_charrefs=False)

        self.debugging = debug
        if self.debugging:
            warnings.warn(
                message="Html2Creole debug is on! warn every data append.")
            self.result = DebugList(self)
        else:
            self.result = []

        self.blockdata = []

        self.root = DocNode("document", None)
        self.cur = self.root

        self.__list_level = 0
Example #19
0
 def _head_repl(self, groups):
     self._upto_block()
     node = DocNode("header", self.cur, groups["head_text"].strip())
     node.level = len(groups["head_head"])
     self.text = None
Example #20
0
 def handle_entityref(self, name):
     self.debug_msg("entityref", f"{name!r}")
     DocNode("entityref", self.cur, content=name)
Example #21
0
 def handle_data(self, data):
     self.debug_msg("data", f"{data!r}")
     assert isinstance(data, str)
     DocNode("data", self.cur, content=data)
Example #22
0
 def _char_repl(self, groups):
     if self.text is None:
         self.text = DocNode('text', self.cur, "")
     self.text.content += groups.get('char', "")
Example #23
0
 def _linebreak_repl(self, groups):
     DocNode('break', self.cur, None)
     self.text = None
Example #24
0
 def _pre_inline_repl(self, groups):
     text = groups.get('pre_inline_text', "")
     DocNode('pre_inline', self.cur, text)
     self.text = None
Example #25
0
 def _line_repl(self, groups):
     """ Transfer newline from the original markup into the html code """
     self._upto_block()
     DocNode('line', self.cur, "")
Example #26
0
 def handle_entityref(self, name):
     self.debug_msg("entityref", "%r" % name)
     DocNode("entityref", self.cur, content=name)
Example #27
0
 def _head_repl(self, groups):
     self._upto_block()
     node = DocNode('header', self.cur, groups['head_text'].strip())
     node.level = len(groups['head_head'])
     self.text = None
Example #28
0
 def _separator_repl(self, groups):
     self._upto_block()
     DocNode('separator', self.cur)
Example #29
0
 def handle_data(self, data):
     self.debug_msg("data", "%r" % data)
     if isinstance(data, BINARY_TYPE):
         data = unicode(data)
     DocNode("data", self.cur, content=data)
Example #30
0
 def _head_repl(self, groups):
     self._upto_block()
     node = DocNode('header', self.cur, groups['head_text'].strip())
     node.level = len(groups['head_head'])
     self.text = None