Beispiel #1
0
def process(word):
    url = _template.format(word=word)
    html = web.get_page(url, user_agent=True)
    txt = web.html_to_text(html).decode("utf-8")

    # txt = ascii.unicode_to_ascii(txt)
    txt = txt.replace(u"\xb7", "-")
    txt = ascii.remove_non_ascii(txt).encode("ascii")
    txt = re.sub("\[.*?.gif\]", "", txt)

    print_result(txt)
Beispiel #2
0
def process(word):
    url = _template.format(word=word)
    html = web.get_page(url, user_agent=True)
    txt = web.html_to_text(html).decode('utf-8')

    #txt = ascii.unicode_to_ascii(txt)
    txt = txt.replace(u'\xb7', '-')
    txt = ascii.remove_non_ascii(txt).encode('ascii')
    txt = re.sub('\[.*?.gif\]', '', txt)

    print_result(txt)
def test_remove_non_ascii():
    assert ascii.remove_non_ascii('László') == 'Lszl'
    assert ascii.remove_non_ascii('ünnep') == 'nnep'
    assert ascii.remove_non_ascii('áéíóöőúüű-ok') == '-ok'
    assert ascii.remove_non_ascii('ÁÉÍÓÖŐÚÜŰ-ok') == '-ok'
Beispiel #4
0
def test_remove_non_ascii():
    assert ascii.remove_non_ascii('László') == 'Lszl'
    assert ascii.remove_non_ascii('ünnep') == 'nnep'
    assert ascii.remove_non_ascii('áéíóöőúüű-ok') == '-ok'
    assert ascii.remove_non_ascii('ÁÉÍÓÖŐÚÜŰ-ok') == '-ok'