Example #1
0
 def test_snapshot_index(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url)
     assert (len(snapshots) > 0)
     assert (snapshots[0]["timestamp"] == "19961102145216")
     clipped = waybackpack.search(url, to_date="1996")
     assert (len(clipped) < len(snapshots))
     assert (len(clipped) == 5)
Example #2
0
 def test_snapshot_index(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url)
     assert(len(snapshots) > 0)
     assert(snapshots[0]["timestamp"] == "19961102145216")
     clipped = waybackpack.search(
         url,
         to_date="1996"
     )
     assert(len(clipped) < len(snapshots))
     assert(len(clipped) == 4)
Example #3
0
 def test_basic(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url, to_date=1996)
     pack = waybackpack.Pack(url, snapshots=snapshots)
     dirpath = tempfile.mkdtemp()
     pack.download_to(dirpath)
     shutil.rmtree(dirpath)
Example #4
0
 def test_basic(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url, to_date=1996)
     pack = waybackpack.Pack(url, snapshots=snapshots)
     dirpath = tempfile.mkdtemp()
     pack.download_to(dirpath)
     shutil.rmtree(dirpath)
Example #5
0
 def test_basic(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url)
     first = waybackpack.Asset(snapshots[0])
     content = first.fetch()
     assert (b"Regulatory Information" in content)
     assert (len(content) > 0)
Example #6
0
 def test_basic(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url)
     first = waybackpack.Asset(snapshots[0])
     content = first.fetch()
     assert(b"Regulatory Information" in content)        
     assert(len(content) > 0)
Example #7
0
 def test_basic(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url, to_date=1996)
     timestamps = [ snap["timestamp"] for snap in snapshots ]
     pack = waybackpack.Pack(url, timestamps)
     dirpath = tempfile.mkdtemp()
     pack.download_to(dirpath)
     shutil.rmtree(dirpath)
Example #8
0
 def test_basic(self):
     url = "http://www.dol.gov/"
     snapshots = waybackpack.search(url, to_date=1996)
     timestamps = [snap["timestamp"] for snap in snapshots]
     pack = waybackpack.Pack(url, timestamps)
     dirpath = tempfile.mkdtemp()
     pack.download_to(dirpath)
     shutil.rmtree(dirpath)
Example #9
0
 def test_basic(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url)
     timestamps = [ snap["timestamp"] for snap in snapshots ]
     first = waybackpack.Asset(url, timestamps[0])
     content = first.fetch()
     assert(b"Regulatory Information" in content)        
     assert(len(content) > 0)
Example #10
0
 def test_basic(self):
     url = "dol.gov"
     snapshots = waybackpack.search(url)
     timestamps = [snap["timestamp"] for snap in snapshots]
     first = waybackpack.Asset(url, timestamps[0])
     content = first.fetch()
     assert (b"Regulatory Information" in content)
     assert (len(content) > 0)
Example #11
0
 def test_uniques(self):
     url = "dol.gov"
     uniques = waybackpack.search(
         url,
         to_date="1996",
         uniques_only=True
     )
     assert(len(uniques) == 2)
Example #12
0
 def test_basic(self):
     url = "http://www.dol.gov/"
     snapshots = waybackpack.search(url)
     timestamps = [snap["timestamp"] for snap in snapshots]
     first = waybackpack.Asset(url, timestamps[0])
     session = waybackpack.Session(follow_redirects=True)
     content = first.fetch(session=session)
     assert (b"Regulatory Information" in content)
     assert (len(content) > 0)
Example #13
0
    def test_empty_result(self):
        timestamps = waybackpack.search(URL, from_date = "2020")
        assert(len(timestamps) == 0)

        pack = waybackpack.Pack(
            URL,
            timestamps=timestamps,
        )
        assert(len(pack.timestamps) == 0)
Example #14
0
def search_archive(url):
    # search for all unique captures for the URL
    results = waybackpack.search(url, uniques_only=True)
    timestamps = []

    # build a list of timestamps for captures
    for snapshot in results:
        timestamps.append(snapshot['timestamp'])

    # request a list of archives for each timestamp
    packed_results = waybackpack.Pack(url, timestamps=timestamps)
    return packed_results
Example #15
0
def search_archive(url):
    # search for all unique captures for the URL
    results = waybackpack.search(url,uniques_only=True)
    timestamps = []

    # build a list of timestamps for captures
    for snapshot in results:
        timestamps.append(snapshot['timestamp'])

    # request a list of archives for each timestamp
    packed_results = waybackpack.Pack(url,timestamps=timestamps)
    return packed_results
Example #16
0
 def test_no_clobber(self):
     url = "http://whitehouse.gov/"
     snapshots = waybackpack.search(url,
                                    to_date=20010510,
                                    from_date=20010501)
     timestamps = [snap["timestamp"] for snap in snapshots]
     pack = waybackpack.Pack(url, timestamps)
     dirpath = tempfile.mkdtemp()
     pack.download_to(dirpath, no_clobber=True)
     pack = waybackpack.Pack(url, timestamps)
     for asset in pack.assets:
         asset.fetch = MagicMock(return_value=b"asdfasdf")
     pack.download_to(dirpath, no_clobber=True)
     self.assertTrue(
         sum(asset.fetch.call_count
             for asset in pack.assets) < len(pack.assets))
     shutil.rmtree(dirpath)
Example #17
0
 def test_uniques(self):
     url = "dol.gov"
     uniques = waybackpack.search(url, to_date="1996", uniques_only=True)
     assert (len(uniques) == 2)