Esempio n. 1
0
def test_remove_insignificant_text_nodes():
    html = dedent('''
        <html>
            <head />
            <body>
                <p>
                    one <em>two</em> <strong>three</strong>
                </p>
                <table>
                    <tr>
                        <td>stuff</td>
                    </tr>
                </table>
            </body>
        </html>
    ''')
    target_html = ('<p> one <em>two</em> <strong>three</strong> </p> '
                   '<table><tr><td>stuff</td></tr></table>')

    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, target_html)

    # Check that it is idempotent.
    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, target_html)
Esempio n. 2
0
def test_remove_insignificant_text_nodes():
    html = dedent(
        """
        <html>
            <head />
            <body>
                <p>
                    one <em>two</em> <strong>three</strong>
                </p>
                <table>
                    <tr>
                        <td>stuff</td>
                    </tr>
                </table>
            </body>
        </html>
    """
    )
    target_html = "<p> one <em>two</em> <strong>three</strong> </p> " "<table><tr><td>stuff</td></tr></table>"

    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, target_html)

    # Check that it is idempotent.
    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, target_html)
Esempio n. 3
0
def test_remove_insignificant_text_nodes_nbsp():
    html = dedent(
        """
        <table>
        <tbody>
        <tr>
            <td> </td>
            <td>&#160;</td>
            <td>&nbsp;</td>
        </tr>
        </tbody>
        </table>
    """
    )
    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, ("<table><tbody><tr><td> </td><td> </td><td> </td>" "</tr></tbody></table>"))
Esempio n. 4
0
def test_remove_insignificant_text_nodes_nbsp():
    html = dedent('''
        <table>
        <tbody>
        <tr>
            <td> </td>
            <td>&#160;</td>
            <td>&nbsp;</td>
        </tr>
        </tbody>
        </table>
    ''')
    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(
        html,
        ('<table><tbody><tr><td> </td><td> </td><td> </td>'
         '</tr></tbody></table>'),
    )