def test_robotstxt_to_df_saves_single_file(): robotstxt_to_df('https://www.media-supermarket.com/robots.txt', output_file='robots_output.jl') result = pd.read_json('robots_output.jl', lines=True) assert isinstance(result, pd.core.frame.DataFrame) assert all(col in result for col in ['directive', 'content', 'download_date']) os.remove('robots_output.jl')
def test_robotstxt_to_df(): result = robotstxt_to_df('https://www.media-supermarket.com/robots.txt') assert isinstance(result, pd.core.frame.DataFrame) assert all(col in result for col in ['directive', 'content', 'download_date'])
def test_robotstxt_to_df_contains_errors(): result = robotstxt_to_df('wrong_url.html') assert 'errors' in result
def test_robotstxt_to_df_raises_on_wrong_file(): with pytest.raises(ValueError): robotstxt_to_df(robots_file, output_file='wrong_extension.pdf')