Esempio n. 1
0
def is_valid_html(html):
    try:
        fragments_fromstring(html.encode('utf-8'),
                             parser=HTMLParser(strict=True))
    except ParseError:
        return False
    return True
Esempio n. 2
0
    def _validate_htmloutput(self, htmloutput, field, value):
        try:
            fragments_fromstring(value.encode('utf-8'),
                                 parser=HTMLParser(strict=True))
        except ParseError as e:
            return self._error(
                field, 'The provided HTML template is not valid: {}'.format(e))

        if isinstance(htmloutput, dict):
            if htmloutput.get('template_vars_required'):
                vars = re.findall('\$(\w+)', value)
                if not len(vars):
                    return self._error(
                        field,
                        "The provided HTML template is not valid: no vars available."
                    )
Esempio n. 3
0
 def make_one(self, **kwargs):
     from lxml.html.html5parser import HTMLParser
     return HTMLParser(**kwargs)
Esempio n. 4
0
# Constants
VALID_NON_UNICODE_IDENTIFIER_RE = re.compile(r'^[_a-zA-Z][_a-zA-Z0-9]*$')
PY_KEYWORDS = { # from: https://docs.python.org/3.3/reference/lexical_analysis.html#keywords
    'False',      'class',      'finally',    'is',         'return',
    'None',       'continue',   'for',        'lambda',     'try',
    'True',       'def',        'from',       'nonlocal',   'while',
    'and',        'del',        'global',     'not',        'with',
    'as',         'elif',       'if',         'or',         'yield',
    'assert',     'else',       'import',     'pass',
    'break',      'except',     'in',         'raise',
}

_html5Parser = HTMLParser(
    # tree=TreeBuilder  -> done by lxml.html.html5parser.HTMLParser.__init__
    strict=False,                 # default
    namespaceHTMLElements=False,  # non-default
    debug=False                   # default
)

_xmlParser = XMLParser(
    encoding='utf-8',
    remove_blank_text=True,
    huge_tree=True,
    recover=True,   # ATTENTION: recover=True should *never* be needed at this point, but html5lib is broken in it's namespace-support (reading namespaced stuff correctly).  Disable this and see HtmlToTagTest.test_full_cicle fail horribly.

    # Default from LXML:
    # attribute_defaults=False,
    # dtd_validation=False,
    # load_dtd=False,
    # no_network=True,
    # ns_clean=False,