Esempi in Python per HTMLParser

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: lxml.html.html5parser

Classe/tipologia: HTMLParser

Esempi su hotexamples.com: 4

HTMLParser in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per lxml.html.html5parser.HTMLParser, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

HTMLParser(4)

Metodi utilizzati di frequente

HTMLParser (4)

Esempio n. 1

Mostra file

def is_valid_html(html):
    try:
        fragments_fromstring(html.encode('utf-8'),
                             parser=HTMLParser(strict=True))
    except ParseError:
        return False
    return True

Esempio n. 2

Mostra file

    def _validate_htmloutput(self, htmloutput, field, value):
        try:
            fragments_fromstring(value.encode('utf-8'),
                                 parser=HTMLParser(strict=True))
        except ParseError as e:
            return self._error(
                field, 'The provided HTML template is not valid: {}'.format(e))

        if isinstance(htmloutput, dict):
            if htmloutput.get('template_vars_required'):
                vars = re.findall('\$(\w+)', value)
                if not len(vars):
                    return self._error(
                        field,
                        "The provided HTML template is not valid: no vars available."
                    )

Esempio n. 3

Mostra file

 def make_one(self, **kwargs):
     from lxml.html.html5parser import HTMLParser
     return HTMLParser(**kwargs)

Esempio n. 4

Mostra file

# Constants
VALID_NON_UNICODE_IDENTIFIER_RE = re.compile(r'^[_a-zA-Z][_a-zA-Z0-9]*$')
PY_KEYWORDS = { # from: https://docs.python.org/3.3/reference/lexical_analysis.html#keywords
    'False',      'class',      'finally',    'is',         'return',
    'None',       'continue',   'for',        'lambda',     'try',
    'True',       'def',        'from',       'nonlocal',   'while',
    'and',        'del',        'global',     'not',        'with',
    'as',         'elif',       'if',         'or',         'yield',
    'assert',     'else',       'import',     'pass',
    'break',      'except',     'in',         'raise',
}

_html5Parser = HTMLParser(
    # tree=TreeBuilder  -> done by lxml.html.html5parser.HTMLParser.__init__
    strict=False,                 # default
    namespaceHTMLElements=False,  # non-default
    debug=False                   # default
)

_xmlParser = XMLParser(
    encoding='utf-8',
    remove_blank_text=True,
    huge_tree=True,
    recover=True,   # ATTENTION: recover=True should *never* be needed at this point, but html5lib is broken in it's namespace-support (reading namespaced stuff correctly).  Disable this and see HtmlToTagTest.test_full_cicle fail horribly.

    # Default from LXML:
    # attribute_defaults=False,
    # dtd_validation=False,
    # load_dtd=False,
    # no_network=True,
    # ns_clean=False,