Example #1
0
    def process_html(cls, text: str, environment: TypographEnvironment = None, encoding: str = 'utf-8',
                     autolink: bool = True) -> str:
        """
        :param text: html text
        :param environment: TypographSettings
        :param encoding: text endocing
        :param autolink: convert all in-text url adress into links
        :return:
        """

        if environment is None:
            environment = get_default_environment()

        node = html.fromstring(text)
        cls._process_node(node, environment)
        if autolink:
            clean.autolink(node, [cls.AUTOLINK_REGEX], avoid_hosts=[])
        text_processed = html.tostring(node, encoding=encoding).decode(encoding)

        return text_processed.replace(cls.VERBATIM, '&')
Example #2
0
    def _to_python(self, value, state):
        try:
            from lxml.html.clean import Cleaner, autolink, word_break
            from lxml.html import fragment_fromstring
            from lxml.etree import tostring
            
            allowed_attributes = ['href', 'target', 'rel', 'name', 'title', 'src', 'width', 'height', 'alt']
            cleaner = Cleaner(style=True, add_nofollow=True, allow_tags=['div', 'p', 'br', 'a', 'strong', 'b', 'blockquote', 'em', 'i', 'img', 'u', 's', 'del'], remove_unknown_tags=False)

            fragment = fragment_fromstring(value.replace("\r\n", "<br>").replace("\n", "<br>").replace("\r", "<br>"), create_parent='div')
            for element in fragment.xpath('//*[@*]'):
                for ek in element.attrib.iterkeys():
                    if ek not in allowed_attributes:
                        del element.attrib[ek]

            cleaner(fragment)
            autolink(fragment)
            word_break(fragment)
            return tostring(fragment)
        except:
            raise formencode.Invalid(self.message('invalidHTML', state), value, state)
Example #3
0
def fix_links(doc):
    autolink(doc)
    for link in doc.xpath('//a[@href]'):
        link.attrib['target'] = '_blank'
    return doc
Example #4
0
def autolink(doc):
    """Replace http:// strings (texts) with HTML links.
    
    See http://lxml.de/lxmlhtml.html, section autolink."""
    clean.autolink(doc)
    return doc
Example #5
0
def autolink(doc):
    """Replace http:// strings (texts) with HTML links.

    See http://lxml.de/lxmlhtml.html, section autolink."""
    clean.autolink(doc)
    return doc