Ejemplos de parseHTML en Python

Lenguaje de programación: Python

Namespace/Package Name: softfab.xmlgen

Método / Función: parseHTML

Ejemplos en hotexamples.com: 11

Python parseHTML - 11 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de softfab.xmlgen.parseHTML extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def testRaisePI():
    """Check propagation of handler exceptions."""
    def handler(name, arg):
        raise KeyError(f'unknown PI: {name}')

    with raises(KeyError):
        parseHTML('<p>A processing <?jump> instruction.</p>',
                  piHandler=handler)

Ejemplo n.º 2

Mostrar archivo

def testIgnoreXMLSyntax():
    """Check parsing of a PI using XML syntax (question mark at end)."""
    def handler(name, arg):
        assert name == 'jump'
        return arg.upper()

    parsed = parseHTML('<p>A processing <?jump lazy fox?> instruction.</p>',
                       piHandler=handler)
    assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">'
                                   'A processing LAZY FOX instruction.'
                                   '</p>')

Ejemplo n.º 3

Mostrar archivo

def testNoArgPI():
    """Check parsing of processing instruction with no arguments."""
    def handler(name, arg):
        assert name == 'jump'
        assert arg == ''
        return xhtml.br

    parsed = parseHTML('<p>A processing <?jump> instruction.</p>',
                       piHandler=handler)
    assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">'
                                   'A processing <br/> instruction.'
                                   '</p>')

Ejemplo n.º 4

Mostrar archivo

def testIgnoreXMLDecl():
    """Check parsing of XML declaration."""
    def handler(name, arg):
        assert False

    parsed = parseHTML(
        '<?xml version="1.0" encoding="UTF-8" ?>'
        '<html><body><p>XHTML document.</p></body></html>',
        piHandler=handler)
    assert parsed.flattenXML() == (
        '<html xmlns="http://www.w3.org/1999/xhtml">'
        '<body><p>XHTML document.</p></body>'
        '</html>')

Ejemplo n.º 5

Mostrar archivo

def testArgPI():
    """Check parsing of processing instruction with an argument."""
    def handler(name, arg):
        assert name == 'jump'
        return xhtml.span[arg]

    parsed = parseHTML(
        '<p>A processing <?jump a little higher> instruction.</p>',
        piHandler=handler)
    assert parsed.flattenXML() == (
        '<p xmlns="http://www.w3.org/1999/xhtml">'
        'A processing <span>a little higher</span> instruction.'
        '</p>')

Ejemplo n.º 6

Mostrar archivo

    def postProcess(self) -> Optional[XML]:
        """Returns a post-processed version of previously rendered content,
        or None if no rendered content is available or post-processing failed.
        """

        # Check whether table of contents needs updating.
        toc = tuple(
            (name + '/', page.extracted) for name, page in self.childPages)
        # Note that ExtractedInfo doesn't define __eq__, but since it is
        # cached, comparing object identity is good enough.
        if toc != self.__toc:
            self.__toc = toc
            self.__renderedXML = None

        # Use cached version if available.
        renderedXML = self.__renderedXML
        if renderedXML is not None:
            return renderedXML

        # Check whether we can post-process.
        module = self.module
        if module is None:
            return None
        renderedStr = self.__renderedStr
        if renderedStr is None:
            return None

        # While Python-Markdown uses ElementTree internally, there is
        # no way to get the full output as a tree, since inline HTML
        # is re-inserted after the tree has been serialized.
        # So unfortunately we have to parse the serialized output.
        try:
            renderedXML = parseHTML(renderedStr, piHandler=self.piHandler)
        except Exception:
            logging.exception('Error post-processing content for %s',
                              self.resource.packageName)
            self.errors |= DocErrors.RENDERING
            return None
        else:
            self.__renderedXML = renderedXML
            return renderedXML

Ejemplo n.º 7

Mostrar archivo

def testMultiTopLevel():
    """Check whether we can handle multiple top-level tags."""
    parsed = parseHTML('<h1>Hello!</h1><h1>Goodbye!</h1>')
    assert parsed.flattenXML() == (
        '<h1 xmlns="http://www.w3.org/1999/xhtml">Hello!</h1>'
        '<h1 xmlns="http://www.w3.org/1999/xhtml">Goodbye!</h1>')

Ejemplo n.º 8

Mostrar archivo

def testBasic():
    """Check whether basic functionality works."""
    parsed = parseHTML('<h1>Hello!</h1>')
    assert parsed.flattenXML() == (
        '<h1 xmlns="http://www.w3.org/1999/xhtml">Hello!</h1>')

Ejemplo n.º 9

Mostrar archivo

def testIgnorePI():
    """Check parsing of processing instruction with no handlers."""
    parsed = parseHTML('<p>A processing <?jump> instruction.</p>')
    assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">'
                                   'A processing  instruction.'
                                   '</p>')

Ejemplo n.º 10

Mostrar archivo

def testVoid():
    """Check handling of void elements."""
    parsed = parseHTML('<p>Text with<br/>a void element.</p>')
    assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">'
                                   'Text with<br/>a void element.'
                                   '</p>')

Ejemplo n.º 11

Mostrar archivo

def testNested():
    """Check handling of nested content."""
    parsed = parseHTML('<p>Text with <i>nested</i> tags.</p>')
    assert parsed.flattenXML() == ('<p xmlns="http://www.w3.org/1999/xhtml">'
                                   'Text with <i>nested</i> tags.'
                                   '</p>')