def test_untar(self): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) test_files = ["foo", "bar", "baz"] sha_list = [] for name in test_files: filename = tmpdir / name with open(filename, "wb") as f: f.write(name.encode()) # Record checksums for later. sha_list.append(utils.sha256(filename)) tar = tmpdir / "test.tgz" with utils.cd(tmpdir): with tarfile.open(tar, mode="w:gz") as tf: for name in test_files: tf.add(name) dest = tmpdir / "dest" dest.mkdir() utils.untar(tar, dest) for name, sha in zip(test_files, sha_list): filename = dest / name self.assertTrue(filename.exists()) # Check that extracted files have the same checksums as # the files we put in the tar. self.assertEqual(utils.sha256(filename), sha)
def test_sha256_small(self): with tempfile.TemporaryDirectory() as tmpdir: filename = pathlib.Path(tmpdir) / "test.foo" # write binary file, to avoid windows/mac/unix line ending differences with open(filename, "wb") as f: f.write(b"foo-bar-baz") sha256sum = utils.sha256(filename) self.assertEqual( sha256sum, # Calculated with `sha256sum` from GNU coreutils. "269dce1a5bb90188b2d9cf542a7c30e410c7d8251e34a97bfea56062df51ae23", )
def test_sha256_big(self): with tempfile.TemporaryDirectory() as tmpdir: filename = pathlib.Path(tmpdir) / "test.foo" # write binary file, to avoid windows/mac/unix line ending differences with open(filename, "wb") as f: for i in range(1024 * 1024): f.write(str(i).encode()) sha256sum = utils.sha256(filename) self.assertEqual( sha256sum, # Calculated with `sha256sum` from GNU coreutils. "995e0fde646f7dc98423af9a862be96014574bfa76be1186b484f796c4e58533", )
def setUpModule(): destination = pathlib.Path("_test_cache/zipfiles/") for an in stdpopsim.all_annotations(): key = an._cache.cache_path local_file = destination / key if not local_file.exists(): cache_dir = local_file.parent cache_dir.mkdir(exist_ok=True, parents=True) print("Downloading", an.zarr_url) utils.download(an.zarr_url, local_file) # This assertion could fail if we update a file on AWS, # or a developer creates a new annotation with the wrong checksum # (in the latter case, this should at least be caught during CI tests). assert utils.sha256(local_file) == an.zarr_sha256, ( f"SHA256 for {local_file} doesn't match the SHA256 for " f"{an.id}. If you didn't add this SHA256 yourself, " f"try deleting {local_file} and restarting the tests.") saved_urls[key] = an.zarr_url an.zarr_url = local_file.resolve().as_uri() an._cache.url = an.zarr_url
def setUpModule(): destination = pathlib.Path("_test_cache/tarballs") for genetic_map in stdpopsim.all_genetic_maps(): key = genetic_map.id local_file = destination / (key + ".tar.gz") if not local_file.exists(): cache_dir = local_file.parent cache_dir.mkdir(exist_ok=True, parents=True) print("Downloading", genetic_map.url) utils.download(genetic_map.url, local_file) # This assertion could fail if we update a file on AWS, # or a developer creates a new genetic map with the wrong checksum # (in the latter case, this should at least be caught during CI tests). assert utils.sha256(local_file) == genetic_map.sha256, ( f"SHA256 for {local_file} doesn't match the SHA256 for " f"{genetic_map.id}. If you didn't add this SHA256 yourself, " f"try deleting {local_file} and restarting the tests.") saved_urls[key] = genetic_map.url genetic_map.url = local_file.resolve().as_uri() genetic_map._cache.url = genetic_map.url
def test_caching(self): for extract in (True, False): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) with utils.cd(tmpdir): filename = "test.foo" with open(filename, "w") as f: print("foo", file=f) tar = tmpdir / "test.tgz" with tarfile.open(tar, "w:gz") as tf: tf.add(filename) sha256 = utils.sha256(tar) cache = stdpopsim.CachedData( namespace="test", url=tar.resolve().as_uri(), sha256=sha256, extract=extract, ) self.assertFalse(cache.is_cached()) self.assertFalse(cache.is_valid()) cache.download() self.assertTrue(cache.is_cached()) self.assertTrue(cache.is_valid()) # try to download with incorrect checksum cache.sha256 = "1234" self.assertTrue(cache.is_cached()) self.assertFalse(cache.is_valid()) with self.assertRaises(ValueError): # checksum mismatch cache.download() self.assertFalse(cache.is_cached()) self.assertFalse(cache.is_valid()) # fix the checksum and download again cache.sha256 = sha256 cache.download() self.assertTrue(cache.is_cached()) self.assertTrue(cache.is_valid())
def test_multiple_threads_downloading(self): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) with utils.cd(tmpdir): filename = "test.foo" with open(filename, "w") as f: print("foo", file=f) tar = tmpdir / "test.tgz" with tarfile.open(tar, "w:gz") as tf: tf.add(filename) cache = stdpopsim.CachedData( namespace="test", url=tar.resolve().as_uri(), sha256=utils.sha256(tar), extract=True, ) cache.download() # Trick the download code into thinking there's several happening # concurrently cache.is_cached = lambda: False with self.assertWarns(UserWarning): cache.download()