def test_basic(self): url = "dol.gov" snapshots = waybackpack.search(url, to_date=1996) pack = waybackpack.Pack(url, snapshots=snapshots) dirpath = tempfile.mkdtemp() pack.download_to(dirpath) shutil.rmtree(dirpath)
def test_no_clobber(self): url = "http://whitehouse.gov/" snapshots = waybackpack.search(url, to_date=20010510, from_date=20010501) timestamps = [snap["timestamp"] for snap in snapshots] pack = waybackpack.Pack(url, timestamps) dirpath = tempfile.mkdtemp() pack.download_to(dirpath, no_clobber=True) pack = waybackpack.Pack(url, timestamps) for asset in pack.assets: asset.fetch = MagicMock(return_value=b"asdfasdf") pack.download_to(dirpath, no_clobber=True) self.assertTrue( sum(asset.fetch.call_count for asset in pack.assets) < len(pack.assets)) shutil.rmtree(dirpath)
def test_basic(self): url = "http://www.dol.gov/" snapshots = waybackpack.search(url, to_date=1996) timestamps = [snap["timestamp"] for snap in snapshots] pack = waybackpack.Pack(url, timestamps) dirpath = tempfile.mkdtemp() pack.download_to(dirpath) shutil.rmtree(dirpath)
def test_empty_result(self): timestamps = waybackpack.search(URL, from_date = "2020") assert(len(timestamps) == 0) pack = waybackpack.Pack( URL, timestamps=timestamps, ) assert(len(pack.timestamps) == 0)
def search_archive(url): # search for all unique captures for the URL results = waybackpack.search(url, uniques_only=True) timestamps = [] # build a list of timestamps for captures for snapshot in results: timestamps.append(snapshot['timestamp']) # request a list of archives for each timestamp packed_results = waybackpack.Pack(url, timestamps=timestamps) return packed_results