Example #1
0
def test_sitemap_index():
    result = sitemap_to_df(sitemap_index_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert 'errors' in result
    errors = {
        'WARNING: Sitemap contains a link to itself',
        'HTTP Error 404: Not Found'
    }
    assert errors.issubset(result['errors'])
    assert all([col in result for col in ['loc', 'download_date', 'sitemap']])
Example #2
0
def test_get_sitemaps_from_robotstxt():
    result = sitemap_to_df(robotstxt_url)
    assert isinstance(result, pd.core.frame.DataFrame)
Example #3
0
def test_news_sitemap():
    result = sitemap_to_df(news_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert 'news' in result
Example #4
0
def test_video_sitemap():
    result = sitemap_to_df(video_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert 'video_content_loc' in result
Example #5
0
def test_image_sitemap():
    result = sitemap_to_df(image_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert 'image' in result
Example #6
0
def test_error_sitemap():
    with pytest.raises(Exception):
        sitemap_to_df(error_sitemap_url)
Example #7
0
def test_gz_sitemap():
    result = sitemap_to_df(zipped_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert len(result) == 5
Example #8
0
def test_regular_sitemap():
    result = sitemap_to_df(regular_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert len(result) == 5
Example #9
0
def test_sitemap_index():
    result = sitemap_to_df(sitemap_index_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert len(result) == 6
Example #10
0
def test_get_sitemaps_from_robotstxt():
    result = sitemap_to_df(robotstxt_url)
Example #11
0
def test_sitemap_index():
    result = sitemap_to_df(sitemap_index_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert all([col in result for col in ['loc', 'download_date', 'sitemap']])