コード例 #1
0
def test_sitemap_index():
    result = sitemap_to_df(sitemap_index_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert 'errors' in result
    errors = {
        'WARNING: Sitemap contains a link to itself',
        'HTTP Error 404: Not Found'
    }
    assert errors.issubset(result['errors'])
    assert all([col in result for col in ['loc', 'download_date', 'sitemap']])
コード例 #2
0
def test_get_sitemaps_from_robotstxt():
    result = sitemap_to_df(robotstxt_url)
    assert isinstance(result, pd.core.frame.DataFrame)
コード例 #3
0
def test_news_sitemap():
    result = sitemap_to_df(news_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert 'news' in result
コード例 #4
0
def test_video_sitemap():
    result = sitemap_to_df(video_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert 'video_content_loc' in result
コード例 #5
0
def test_image_sitemap():
    result = sitemap_to_df(image_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert 'image' in result
コード例 #6
0
def test_error_sitemap():
    with pytest.raises(Exception):
        sitemap_to_df(error_sitemap_url)
コード例 #7
0
def test_gz_sitemap():
    result = sitemap_to_df(zipped_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert len(result) == 5
コード例 #8
0
def test_regular_sitemap():
    result = sitemap_to_df(regular_sitemap_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert len(result) == 5
コード例 #9
0
def test_sitemap_index():
    result = sitemap_to_df(sitemap_index_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert len(result) == 6
コード例 #10
0
def test_get_sitemaps_from_robotstxt():
    result = sitemap_to_df(robotstxt_url)
コード例 #11
0
def test_sitemap_index():
    result = sitemap_to_df(sitemap_index_url)
    assert isinstance(result, pd.core.frame.DataFrame)
    assert all([col in result for col in ['loc', 'download_date', 'sitemap']])