Ejemplo n.º 1
0
 def test_basic(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url, to_date=1996)
     pack = waybackpack.Pack(url, snapshots=snapshots)
     dirpath = tempfile.mkdtemp()
     pack.download_to(dirpath)
     shutil.rmtree(dirpath)
Ejemplo n.º 2
0
 def test_no_clobber(self):
     url = "http://whitehouse.gov/"
     snapshots = waybackpack.search(url,
                                    to_date=20010510,
                                    from_date=20010501)
     timestamps = [snap["timestamp"] for snap in snapshots]
     pack = waybackpack.Pack(url, timestamps)
     dirpath = tempfile.mkdtemp()
     pack.download_to(dirpath, no_clobber=True)
     pack = waybackpack.Pack(url, timestamps)
     for asset in pack.assets:
         asset.fetch = MagicMock(return_value=b"asdfasdf")
     pack.download_to(dirpath, no_clobber=True)
     self.assertTrue(
         sum(asset.fetch.call_count
             for asset in pack.assets) < len(pack.assets))
     shutil.rmtree(dirpath)
Ejemplo n.º 3
0
 def test_basic(self):
     url = "http://www.dol.gov/"
     snapshots = waybackpack.search(url, to_date=1996)
     timestamps = [snap["timestamp"] for snap in snapshots]
     pack = waybackpack.Pack(url, timestamps)
     dirpath = tempfile.mkdtemp()
     pack.download_to(dirpath)
     shutil.rmtree(dirpath)
Ejemplo n.º 4
0
    def test_empty_result(self):
        timestamps = waybackpack.search(URL, from_date = "2020")
        assert(len(timestamps) == 0)

        pack = waybackpack.Pack(
            URL,
            timestamps=timestamps,
        )
        assert(len(pack.timestamps) == 0)
Ejemplo n.º 5
0
def search_archive(url):
    # search for all unique captures for the URL
    results = waybackpack.search(url, uniques_only=True)
    timestamps = []

    # build a list of timestamps for captures
    for snapshot in results:
        timestamps.append(snapshot['timestamp'])

    # request a list of archives for each timestamp
    packed_results = waybackpack.Pack(url, timestamps=timestamps)
    return packed_results