def test_tar_path_traversal_attack(self): # Test for vulnerability to path-traversal attacks. with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) dest = tmpdir / "dest" dest.mkdir() for link_dest in ("../nonexistant", "/nonexistant"): tar = tmpdir / "symlink-path-traversal.tgz" filename = tmpdir / "link" filename.symlink_to(link_dest) with utils.cd(tmpdir): with tarfile.open(tar, mode="w:gz") as tf: tf.add("link") with self.assertRaises(ValueError): utils.untar(tar, dest) rm_f(filename) rm_f(tar) for name in ("../nonexistant", "/nonexistant"): tar = tmpdir / "path-traversal.tgz" filename = tmpdir / "test-thing" with open(filename, "w") as f: print("blah", file=f) with utils.cd(tmpdir): with tarfile.open(tar, mode="w:gz") as tf: def filt(info): info.name = name # path the file will be extracted to return info tf.add("test-thing", filter=filt) with self.assertRaises(ValueError): utils.untar(tar, dest) rm_f(filename) rm_f(tar)
def test_bad_tar_members(self): # Pretend we downloaded a tarball containing a FIFO or device file. # There is no reasonable use for these types of files in stdpopsim, # so their presence likely indicates a maliciously crafted tarball. # Creating a character or block special device file requires root # privileges, so we instead modify the ``type`` field of each file # in the tar. with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) dest = tmpdir / "dest" dest.mkdir() for filename, type_ in [ ("fifo", tarfile.FIFOTYPE), ("char-device", tarfile.CHRTYPE), ("block-device", tarfile.BLKTYPE), ]: tar = tmpdir / "irregular-type.tgz" filename = tmpdir / "irregular" with open(filename, "w") as f: print("blah", file=f) with utils.cd(tmpdir): with tarfile.open(tar, mode="w:gz") as tf: def filt(info): info.type = type_ # lie about the type return info tf.add("irregular", filter=filt) with self.assertRaises(ValueError): utils.untar(tar, dest) rm_f(filename) rm_f(tar)
def test_untar(self): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) test_files = ["foo", "bar", "baz"] sha_list = [] for name in test_files: filename = tmpdir / name with open(filename, "wb") as f: f.write(name.encode()) # Record checksums for later. sha_list.append(utils.sha256(filename)) tar = tmpdir / "test.tgz" with utils.cd(tmpdir): with tarfile.open(tar, mode="w:gz") as tf: for name in test_files: tf.add(name) dest = tmpdir / "dest" dest.mkdir() utils.untar(tar, dest) for name, sha in zip(test_files, sha_list): filename = dest / name self.assertTrue(filename.exists()) # Check that extracted files have the same checksums as # the files we put in the tar. self.assertEqual(utils.sha256(filename), sha)
def get_genetic_map_tarball(): """ Returns a genetic map in hapmap format in a tarball as a bytes object. """ with tempfile.TemporaryDirectory() as map_dir: for j in range(1, 10): # TODO Have a way to put in different maps?? with open(os.path.join(map_dir, "prefix_chr{}.txt".format(j)), "w") as f: print("Chromosome Position(bp) Rate(cM/Mb) Map(cM)", file=f) print("chr1 55550 2.981822 0.000000", file=f) print("chr1 82571 2.082414 0.080572", file=f) print("chr1 88169 0 0.092229", file=f) # For the tarfile to be in the right format, we must be in the right directory. with utils.cd(map_dir): # Now tar up this map_directory with tempfile.TemporaryFile("wb+") as tmp_file: with tarfile.open(fileobj=tmp_file, mode="w:gz") as tar_file: for filename in os.listdir("."): tar_file.add(filename) # Read back the tarball tmp_file.seek(0) tarball = tmp_file.read() return tarball
def test_cd_context_manager(self): # On Mac, the path we enter with "cd" may differ from # the path we get with cwd() due to symlinks. So we # resolve all paths here to ignore symlink-only differences. old_cwd = pathlib.Path.cwd().resolve() with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir).resolve() self.assertNotEqual(old_cwd, tmpdir) with utils.cd(tmpdir): self.assertEqual(pathlib.Path.cwd().resolve(), tmpdir) self.assertEqual(pathlib.Path.cwd().resolve(), old_cwd)
def get_maps(self, tarball): maps = {} with tempfile.TemporaryFile("wb+") as f: f.write(tarball) f.seek(0) with tarfile.open(fileobj=f, mode="r") as tar_file: with tempfile.TemporaryDirectory() as extract_dir: with utils.cd(extract_dir): tar_file.extractall() for fn in os.listdir(extract_dir): maps[fn] = msprime.RateMap.read_hapmap(fn) return maps
def test_caching(self): for extract in (True, False): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) with utils.cd(tmpdir): filename = "test.foo" with open(filename, "w") as f: print("foo", file=f) tar = tmpdir / "test.tgz" with tarfile.open(tar, "w:gz") as tf: tf.add(filename) sha256 = utils.sha256(tar) cache = stdpopsim.CachedData( namespace="test", url=tar.resolve().as_uri(), sha256=sha256, extract=extract, ) self.assertFalse(cache.is_cached()) self.assertFalse(cache.is_valid()) cache.download() self.assertTrue(cache.is_cached()) self.assertTrue(cache.is_valid()) # try to download with incorrect checksum cache.sha256 = "1234" self.assertTrue(cache.is_cached()) self.assertFalse(cache.is_valid()) with self.assertRaises(ValueError): # checksum mismatch cache.download() self.assertFalse(cache.is_cached()) self.assertFalse(cache.is_valid()) # fix the checksum and download again cache.sha256 = sha256 cache.download() self.assertTrue(cache.is_cached()) self.assertTrue(cache.is_valid())
def test_multiple_threads_downloading(self): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) with utils.cd(tmpdir): filename = "test.foo" with open(filename, "w") as f: print("foo", file=f) tar = tmpdir / "test.tgz" with tarfile.open(tar, "w:gz") as tf: tf.add(filename) cache = stdpopsim.CachedData( namespace="test", url=tar.resolve().as_uri(), sha256=utils.sha256(tar), extract=True, ) cache.download() # Trick the download code into thinking there's several happening # concurrently cache.is_cached = lambda: False with self.assertWarns(UserWarning): cache.download()