def test_to_json(): p = HtmlParser(page) jsoned_document = p.to_json() assert "A fantastic title!" in jsoned_document['title'] clean_body = jsoned_document['body'] assert "A fantastic body!" in clean_body assert "The header" not in clean_body assert "The footer" not in clean_body assert jsoned_document["links"] == [ '/defra', 'www.links1.com', 'www.links2.com', 'www.links3.com', 'http://www.gov.uk/stats.pdf' ] assert jsoned_document["download_links"] == [ 'http://www.gov.uk/stats.pdf' ] assert jsoned_document["organisations"] == [ 'DEFRA' ]
def test_clean_body(): p = HtmlParser(page) clean_body = p.clean_body() assert "A fantastic body!" in clean_body assert "The header" not in clean_body assert "The footer" not in clean_body
def test_links(): p = HtmlParser(page) assert p.links() == [ '/defra', 'www.links1.com', 'www.links2.com', 'www.links3.com', 'http://www.gov.uk/stats.pdf' ]
def test_organisations(): p = HtmlParser(page) assert p.organisations() == [ 'DEFRA' ]
def test_download_links(): p = HtmlParser(page) assert p.download_links() == [ 'http://www.gov.uk/stats.pdf' ]
def test_title(): p = HtmlParser(page) assert "A fantastic title!" in p.title()