def test_snapshot_index(self): url = "dol.gov" snapshots = waybackpack.search(url) assert (len(snapshots) > 0) assert (snapshots[0]["timestamp"] == "19961102145216") clipped = waybackpack.search(url, to_date="1996") assert (len(clipped) < len(snapshots)) assert (len(clipped) == 5)
def test_snapshot_index(self): url = "dol.gov" snapshots = waybackpack.search(url) assert(len(snapshots) > 0) assert(snapshots[0]["timestamp"] == "19961102145216") clipped = waybackpack.search( url, to_date="1996" ) assert(len(clipped) < len(snapshots)) assert(len(clipped) == 4)
def test_basic(self): url = "dol.gov" snapshots = waybackpack.search(url, to_date=1996) pack = waybackpack.Pack(url, snapshots=snapshots) dirpath = tempfile.mkdtemp() pack.download_to(dirpath) shutil.rmtree(dirpath)
def test_basic(self): url = "dol.gov" snapshots = waybackpack.search(url) first = waybackpack.Asset(snapshots[0]) content = first.fetch() assert (b"Regulatory Information" in content) assert (len(content) > 0)
def test_basic(self): url = "dol.gov" snapshots = waybackpack.search(url) first = waybackpack.Asset(snapshots[0]) content = first.fetch() assert(b"Regulatory Information" in content) assert(len(content) > 0)
def test_basic(self): url = "dol.gov" snapshots = waybackpack.search(url, to_date=1996) timestamps = [ snap["timestamp"] for snap in snapshots ] pack = waybackpack.Pack(url, timestamps) dirpath = tempfile.mkdtemp() pack.download_to(dirpath) shutil.rmtree(dirpath)
def test_basic(self): url = "http://www.dol.gov/" snapshots = waybackpack.search(url, to_date=1996) timestamps = [snap["timestamp"] for snap in snapshots] pack = waybackpack.Pack(url, timestamps) dirpath = tempfile.mkdtemp() pack.download_to(dirpath) shutil.rmtree(dirpath)
def test_basic(self): url = "dol.gov" snapshots = waybackpack.search(url) timestamps = [ snap["timestamp"] for snap in snapshots ] first = waybackpack.Asset(url, timestamps[0]) content = first.fetch() assert(b"Regulatory Information" in content) assert(len(content) > 0)
def test_basic(self): url = "dol.gov" snapshots = waybackpack.search(url) timestamps = [snap["timestamp"] for snap in snapshots] first = waybackpack.Asset(url, timestamps[0]) content = first.fetch() assert (b"Regulatory Information" in content) assert (len(content) > 0)
def test_uniques(self): url = "dol.gov" uniques = waybackpack.search( url, to_date="1996", uniques_only=True ) assert(len(uniques) == 2)
def test_basic(self): url = "http://www.dol.gov/" snapshots = waybackpack.search(url) timestamps = [snap["timestamp"] for snap in snapshots] first = waybackpack.Asset(url, timestamps[0]) session = waybackpack.Session(follow_redirects=True) content = first.fetch(session=session) assert (b"Regulatory Information" in content) assert (len(content) > 0)
def test_empty_result(self): timestamps = waybackpack.search(URL, from_date = "2020") assert(len(timestamps) == 0) pack = waybackpack.Pack( URL, timestamps=timestamps, ) assert(len(pack.timestamps) == 0)
def search_archive(url): # search for all unique captures for the URL results = waybackpack.search(url, uniques_only=True) timestamps = [] # build a list of timestamps for captures for snapshot in results: timestamps.append(snapshot['timestamp']) # request a list of archives for each timestamp packed_results = waybackpack.Pack(url, timestamps=timestamps) return packed_results
def search_archive(url): # search for all unique captures for the URL results = waybackpack.search(url,uniques_only=True) timestamps = [] # build a list of timestamps for captures for snapshot in results: timestamps.append(snapshot['timestamp']) # request a list of archives for each timestamp packed_results = waybackpack.Pack(url,timestamps=timestamps) return packed_results
def test_no_clobber(self): url = "http://whitehouse.gov/" snapshots = waybackpack.search(url, to_date=20010510, from_date=20010501) timestamps = [snap["timestamp"] for snap in snapshots] pack = waybackpack.Pack(url, timestamps) dirpath = tempfile.mkdtemp() pack.download_to(dirpath, no_clobber=True) pack = waybackpack.Pack(url, timestamps) for asset in pack.assets: asset.fetch = MagicMock(return_value=b"asdfasdf") pack.download_to(dirpath, no_clobber=True) self.assertTrue( sum(asset.fetch.call_count for asset in pack.assets) < len(pack.assets)) shutil.rmtree(dirpath)
def test_uniques(self): url = "dol.gov" uniques = waybackpack.search(url, to_date="1996", uniques_only=True) assert (len(uniques) == 2)