def nyt(): nyt_file = File("nytimes.html") nyt_url = ("https://bites-data.s3.us-east-2.amazonaws.com/" "2020-03-10_nytimes.html") nyt_web = Web(nyt_url, nyt_file) yield NYTimes(nyt_web) if CLEAN_UP: if nyt_web.file.path.exists(): nyt_web.file.path.unlink()
def rcp(): rcp_file = File("realclearpolitics.html") rcp_url = ("https://bites-data.s3.us-east-2.amazonaws.com/" "2020-03-10_realclearpolitics.html") rcp_web = Web(rcp_url, rcp_file) yield RealClearPolitics(rcp_web) if CLEAN_UP: if rcp_web.file.path.exists(): rcp_web.file.path.unlink()
def test_web_bad_url(): file = File("clamytoe.html") url = "https://clamytoe.dev" test_web = Web(url, file) with pytest.raises(URLError) as e: test_web.data error = str(e.value) assert 'urlopen error' in error assert ('Name or service not known' in error or 'nodename nor servname provided, or not known' in error)
def test_site(test_file): Site.__abstractmethods__ = set() @dataclass class Dummy(Site): web: Web url = "https://projects.fivethirtyeight.com/polls/" test_web = Web(url, test_file) d = Dummy(test_web) table = d.find_table() rows = d.parse_rows(table) polls = d.polls() stats = d.stats() assert d.web.file.name == "test.html" assert isinstance(Site, ABCMeta) assert rows is None assert polls is None assert stats is None
def test_web(test_file): url = "https://projects.fivethirtyeight.com/polls/" test_web = Web(url, test_file) assert test_web.url == url assert isinstance(test_web.file, File) assert isinstance(test_web.soup, Soup)