Ejemplo n.º 1
    def make_node(self, cls, element):
        node = cls()
        having_block_node = cls in HAVING_BLOCK_NODE
        if element.text and element.text != "\n":
            text = self.unescape_char(element.text)
            if HTML_PLACEHOLDER_RE.search(text):
                node += nodes.raw(format='html',
                                  text=self.unescape_char(text, rawHtml=True))
            elif having_block_node:
                node += nodes.paragraph(text=text)
                node += nodes.Text(text)
        for child in element:
            subnode = self.visit(child)
            if having_block_node and isinstance(subnode, INLINE_NODES):
                all_nodes_is_in_paragraph = True
                if len(node) == 0:
                    node += nodes.paragraph()
                node[0] += subnode
                all_nodes_is_in_paragraph = False
                node += subnode

            if child.tail and child.tail != "\n":
                tail = self.unescape_char(child.tail)
                if HTML_PLACEHOLDER_RE.search(tail):
                    node += nodes.raw(format='html', text=tail)
                elif all_nodes_is_in_paragraph:
                    node[0] += nodes.Text(tail)
                elif having_block_node:
                    node += nodes.paragraph(text=tail)
                    node += nodes.Text(tail)

        return node
Ejemplo n.º 2
    def make_node(self, cls, element):
        node = cls()
        having_block_node = cls in HAVING_BLOCK_NODE
        if element.text and element.text != "\n":
            text = self.unescape_char(element.text)
            if HTML_PLACEHOLDER_RE.search(text):
                node += nodes.raw(format='html', text=self.unescape_char(text, rawHtml=True))
            elif having_block_node:
                node += nodes.paragraph(text=text)
                node += nodes.Text(text)
        for child in element:
            subnode = self.visit(child)
            if having_block_node and isinstance(subnode, INLINE_NODES):
                all_nodes_is_in_paragraph = True
                if len(node) == 0:
                    node += nodes.paragraph()
                node[0] += subnode
                all_nodes_is_in_paragraph = False
                node += subnode

            if child.tail and child.tail != "\n":
                tail = self.unescape_char(child.tail)
                if HTML_PLACEHOLDER_RE.search(tail):
                    node += nodes.raw(format='html', text=tail)
                elif all_nodes_is_in_paragraph:
                    node[0] += nodes.Text(tail)
                elif having_block_node:
                    node += nodes.paragraph(text=tail)
                    node += nodes.Text(tail)

        return node
Ejemplo n.º 3
    def make_node(self, cls, element):
        node = cls()
        having_block_node = cls in HAVING_BLOCK_NODE
        if element.text and element.text != "\n":
            text = self.unescape_char(element.text)
            if HTML_PLACEHOLDER_RE.search(text):
                html_text = self.unescape_char(text, rawHtml=True)
                if html_text.startswith("<!--math"):
                    g = re.match(r"<!--math(.*?)-->", html_text, re.DOTALL)
                    if g:
                        node += nodes.math(text=g.group(1).strip(),
                    node += nodes.raw(format='html', text=html_text)
            elif having_block_node:
                node += nodes.paragraph(text=text)
                node += nodes.Text(text)
        for child in element:
            subnode = self.visit(child)
            if having_block_node and isinstance(subnode, INLINE_NODES):
                all_nodes_is_in_paragraph = True
                if len(node) == 0:
                    node += nodes.paragraph()
                node[0] += subnode
                all_nodes_is_in_paragraph = False
                node += subnode

            if child.tail and child.tail != "\n":
                tail = self.unescape_char(child.tail)
                if HTML_PLACEHOLDER_RE.search(tail):
                    node += nodes.raw(format='html', text=tail)
                elif all_nodes_is_in_paragraph:
                    node[0] += nodes.Text(tail)
                elif having_block_node:
                    node += nodes.paragraph(text=tail)
                    node += nodes.Text(tail)

        return node
def stashedHTML2text(text, md):
    """ Extract raw HTML, reduce to plain text and swap with placeholder. """
    def _html_sub(m):
        """ Substitute raw html with plain text. """
    	    raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
        except (IndexError, TypeError):
            return m.group(0)
        if md.safeMode and not safe:
            return ''
        # Strip out tags and entities - leaveing text
        return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)

    return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
Ejemplo n.º 5
def stashedHTML2text(text, md):
    """ Extract raw HTML, reduce to plain text and swap with placeholder. """
    def _html_sub(m):
        """ Substitute raw html with plain text. """
            raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
        except (IndexError, TypeError):
            return m.group(0)
        if md.safeMode and not safe:
            return ''
        # Strip out tags and entities - leaveing text
        return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)

    return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
Ejemplo n.º 6
    def unescape_char(self, text, rawHtml=False):
        def unescape(matched):
            return chr(int(matched.group(1)))

        def expand_rawhtml(matched):
            html_id = int(matched.group(1))
            html, safe = self.markdown.htmlStash.rawHtmlBlocks[html_id]
            if rawHtml or re.match(r'(&[\#a-zA-Z0-9]*;)', html):
                return html  # unescape HTML entities only
                return matched.group(0)

        text = re.sub('\x02(\d\d)\x03', unescape, text)
        text = HTML_PLACEHOLDER_RE.sub(expand_rawhtml, text)
        return text
Ejemplo n.º 7
def stashedHTML2text(text, md, strip_entities=True):
    """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
    def _html_sub(m):
        """ Substitute raw html with plain text. """
            raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
        except (IndexError, TypeError):  # pragma: no cover
            return m.group(0)
        # Strip out tags and/or entities - leaving text
        res = re.sub(r'(<[^>]+>)', '', raw)
        if strip_entities:
            res = re.sub(r'(&[\#a-zA-Z0-9]+;)', '', res)
        return res

    return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
Ejemplo n.º 8
    def unescape_char(self, text, rawHtml=False):
        def unescape(matched):
            return chr(int(matched.group(1)))

        def expand_rawhtml(matched):
            html_id = int(matched.group(1))
            html, safe = self.markdown.htmlStash.rawHtmlBlocks[html_id]
            if rawHtml or re.match(r'(&[\#a-zA-Z0-9]*;)', html):
                return html  # unescape HTML entities only
                return matched.group(0)

        text = re.sub('\x02(\d\d)\x03', unescape, text)
        text = HTML_PLACEHOLDER_RE.sub(expand_rawhtml, text)
        return text