Esempio n. 1
0
def test_to_json():
    p = HtmlParser(page)
    jsoned_document = p.to_json()

    assert "A fantastic title!" in jsoned_document['title']

    clean_body = jsoned_document['body']

    assert "A fantastic body!" in clean_body
    assert "The header" not in clean_body
    assert "The footer" not in clean_body

    assert jsoned_document["links"] == [
        '/defra',
        'www.links1.com',
        'www.links2.com',
        'www.links3.com',
        'http://www.gov.uk/stats.pdf'
    ]

    assert jsoned_document["download_links"] == [
        'http://www.gov.uk/stats.pdf'
    ]

    assert jsoned_document["organisations"] == [
        'DEFRA'
    ]
Esempio n. 2
0
def test_clean_body():
    p = HtmlParser(page)
    clean_body = p.clean_body()

    assert "A fantastic body!" in clean_body
    assert "The header" not in clean_body
    assert "The footer" not in clean_body
Esempio n. 3
0
def test_links():
    p = HtmlParser(page)

    assert p.links() == [
        '/defra',
        'www.links1.com',
        'www.links2.com',
        'www.links3.com',
        'http://www.gov.uk/stats.pdf'
    ]
Esempio n. 4
0
def test_organisations():
    p = HtmlParser(page)

    assert p.organisations() == [
        'DEFRA'
    ]
Esempio n. 5
0
def test_download_links():
    p = HtmlParser(page)

    assert p.download_links() == [
        'http://www.gov.uk/stats.pdf'
    ]
Esempio n. 6
0
def test_title():
    p = HtmlParser(page)
    assert "A fantastic title!" in p.title()