Python normalise_text Examples

Programming Language: Python

Namespace/Package Name: readabilipy.simplifiers

Method/Function: normalise_text

Examples at hotexamples.com: 6

Python normalise_text - 6 examples found. These are the top rated real world Python examples of readabilipy.simplifiers.normalise_text extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def test_strip_control_characters_non_printing_characters():
    unnormalised_string = "A string with non-printing characters inc\u200Bluded\ufeff"
    assert strip_control_characters(
        unnormalised_string
    ) == "A string with non-printing characters included"
    assert normalise_text(unnormalised_string
                          ) == "A string with non-printing characters included"

Example #2

Show file

File: test_simple_json.py Project: alan-turing-institute/ReadabiliPy

def test_node_index_assignment():
    """Whitelisted elements should get an appropriate index but bare strings should not."""
    html = """
        <div>
            <p>Some text</p>
            <p></p>
            Some bare text
        </div>
    """.strip()
    soup = BeautifulSoup(html, 'html.parser')
    normalised_strings = [
        normalise_text(str(add_node_indexes(elem)))
        for elem in soup.find_all("div")[0].children
    ]
    normalised_strings = [s for s in normalised_strings if s]
    assert normalised_strings == [
        '<p data-node-index="0">Some text</p>', '<p data-node-index="0"></p>',
        'Some bare text'
    ]

Example #3

Show file

def test_strip_control_characters_tab():
    unnormalised_string = "A string with tabs\tinc\u200Bluded\ufeff"
    assert strip_control_characters(
        unnormalised_string) == "A string with tabs\tincluded"
    assert normalise_text(unnormalised_string) == "A string with tabs included"

Example #4

Show file

def test_strip_control_characters_ff():
    unnormalised_string = "A string with form feed\finc\u200Bluded\ufeff"
    assert strip_control_characters(
        unnormalised_string) == "A string with form feed\fincluded"
    assert normalise_text(
        unnormalised_string) == "A string with form feed included"

Example #5

Show file

def test_strip_control_characters_cr_lf():
    unnormalised_string = "A string with new lines\r\ninc\u200Bluded\ufeff"
    assert strip_control_characters(
        unnormalised_string) == "A string with new lines\r\nincluded"
    assert normalise_text(
        unnormalised_string) == "A string with new lines included"

Example #6

Show file

def test_text_normalisation():
    unnormalised_string = "Ame\u0301lie   Poulain"
    assert normalise_text(unnormalised_string) == "Amélie Poulain"