Exemple #1
0
def test_known_urls():

    target = waybackpy.Url("akamhy.github.io", user_agent)
    assert len(target.known_urls(alive=True, subdomain=True)) > 2

    target = waybackpy.Url("akamhy.github.io", user_agent)
    assert len(target.known_urls()) > 3
Exemple #2
0
def test_near():
    url = "google.com"
    target = waybackpy.Url(
        url,
        "Mozilla/5.0 (Windows; U; Windows NT 6.0; de-DE) AppleWebKit/533.20.25 "
        "(KHTML, like Gecko) Version/5.0.3 Safari/533.19.4",
    )
    archive_near_year = target.near(year=2010)
    assert "2010" in str(archive_near_year)

    if sys.version_info > (3, 6):
        archive_near_month_year = str(target.near(year=2015, month=2))
        assert (("201502" in archive_near_month_year)
                or ("201501" in archive_near_month_year)
                or ("201503" in archive_near_month_year))

        target = waybackpy.Url(
            "www.python.org",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246",
        )
        archive_near_hour_day_month_year = str(
            target.near(year=2008, month=5, day=9, hour=15))
        assert (("2008050915" in archive_near_hour_day_month_year)
                or ("2008050914" in archive_near_hour_day_month_year)
                or ("2008050913" in archive_near_hour_day_month_year))

        with pytest.raises(Exception):
            NeverArchivedUrl = (
                "https://ee_3n.wrihkeipef4edia.org/rwti5r_ki/Nertr6w_rork_rse7c_urity"
            )
            target = waybackpy.Url(NeverArchivedUrl, user_agent)
            target.near(year=2010)
    else:
        pass
Exemple #3
0
def test_total_archives():
    if sys.version_info > (3, 6):
        target = waybackpy.Url(" https://google.com ", user_agent)
        assert target.total_archives() > 500000
    else:
        pass
    target = waybackpy.Url(
        " https://gaha.e4i3n.m5iai3kip6ied.cima/gahh2718gs/ahkst63t7gad8 ",
        user_agent)
    assert target.total_archives() == 0
Exemple #4
0
def test_save():
    # Test for urls that exist and can be archived.
    time.sleep(10)

    url_list = [
        "en.wikipedia.org",
        "www.wikidata.org",
        "commons.wikimedia.org",
        "www.wiktionary.org",
        "www.w3schools.com",
        "www.ibm.com",
    ]
    x = random.randint(0, len(url_list) - 1)
    url1 = url_list[x]
    target = waybackpy.Url(
        url1,
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 "
        "(KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
    )
    archived_url1 = target.save()
    assert url1 in archived_url1

    if sys.version_info > (3, 6):

        # Test for urls that are incorrect.
        with pytest.raises(Exception):
            url2 = "ha ha ha ha"
            waybackpy.Url(url2, user_agent)
        time.sleep(5)
        url3 = "http://www.archive.is/faq.html"
        # Test for urls not allowed to archive by robot.txt. Doesn't works anymore. Find alternatives.
        #         with pytest.raises(Exception):
        #
        #             target = waybackpy.Url(
        #                 url3,
        #                 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) "
        #                 "Gecko/20100101 Firefox/25.0",
        #             )
        #             target.save()
        #         time.sleep(5)
        # Non existent urls, test
        with pytest.raises(Exception):
            target = waybackpy.Url(
                url3,
                "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) "
                "AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 "
                "Safari/533.20.27",
            )
            target.save()

    else:
        pass
Exemple #5
0
def test_dunders():
    url = "https://en.wikipedia.org/wiki/Network_security"
    user_agent = "UA"
    target = waybackpy.Url(url, user_agent)
    assert "waybackpy.Url(url=%s, user_agent=%s)" % (url, user_agent) == repr(
        target)
    assert "en.wikipedia.org" in str(target)
Exemple #6
0
def test_url_check():
    broken_url = "http://wwwgooglecom/"
    with pytest.raises(Exception):
        waybackpy.Url(broken_url, user_agent)
Exemple #7
0
def test_clean_url():
    test_url = " https://en.wikipedia.org/wiki/Network security "
    answer = "https://en.wikipedia.org/wiki/Network_security"
    target = waybackpy.Url(test_url, user_agent)
    test_result = target._clean_url()
    assert answer == test_result
Exemple #8
0
def test_get():
    target = waybackpy.Url("google.com", user_agent)
    assert "Welcome to Google" in target.get(target.oldest())
Exemple #9
0
def test_newest():
    url = "github.com/akamhy/waybackpy"
    target = waybackpy.Url(url, user_agent)
    assert url in target.newest()
Exemple #10
0
def test_oldest():
    url = "github.com/akamhy/waybackpy"
    target = waybackpy.Url(url, user_agent)
    assert "20200504141153" in target.oldest()
Exemple #11
0
def test_archive_url():
    url = "github.com/akamhy/waybackpy"
    target = waybackpy.Url(url, user_agent)
    assert "github.com/akamhy" in str(target.archive_url)
Exemple #12
0
def test_json():
    url = "github.com/akamhy/waybackpy"
    target = waybackpy.Url(url, user_agent)
    assert "archived_snapshots" in str(target.JSON)