def create_tarball(filename, root): """Create a tar.gz archive of docs in a directory.""" files = dict(distribution_files(root)) with open(filename, 'wb') as fh: create_tar_gz_from_files(fh, files, filename=os.path.basename(filename), compresslevel=6)
def create_tarball(filename, root): """Create a tar.gz archive of docs in a directory.""" files = dict(distribution_files(root)) with open(filename, 'wb') as fh: create_tar_gz_from_files(fh, files, filename=os.path.basename(filename), compresslevel=6)
def stream_context_tar(topsrcdir, context_dir, out_file, image_name, args): """Like create_context_tar, but streams the tar file to the `out_file` file object.""" archive_files = {} content = [] context_dir = os.path.join(topsrcdir, context_dir) for root, dirs, files in os.walk(context_dir): for f in files: source_path = os.path.join(root, f) archive_path = source_path[len(context_dir) + 1:] archive_files[archive_path] = source_path # Parse Dockerfile for special syntax of extra files to include. with open(os.path.join(context_dir, "Dockerfile"), "r") as fh: for line in fh: content.append(line) if not line.startswith("# %include"): continue p = line[len("# %include "):].strip() if os.path.isabs(p): raise Exception("extra include path cannot be absolute: %s" % p) fs_path = os.path.normpath(os.path.join(topsrcdir, p)) # Check for filesystem traversal exploits. if not fs_path.startswith(topsrcdir): raise Exception("extra include path outside topsrcdir: %s" % p) if not os.path.exists(fs_path): raise Exception("extra include path does not exist: %s" % p) if os.path.isdir(fs_path): for root, dirs, files in os.walk(fs_path): for f in files: source_path = os.path.join(root, f) rel = source_path[len(fs_path) + 1:] archive_path = os.path.join("topsrcdir", p, rel) archive_files[archive_path] = source_path else: archive_path = os.path.join("topsrcdir", p) archive_files[archive_path] = fs_path archive_files["Dockerfile"] = GeneratedFile( "".join(content).encode("utf-8")) writer = HashingWriter(out_file) create_tar_gz_from_files(writer, archive_files, f"{image_name}.tar") return writer.hexdigest()
def test_tar_gz_name(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, 'test.tar.gz') with open(gp, 'wb') as fh: create_tar_gz_from_files(fh, files, filename='foobar', compresslevel=1) self.assertEqual(file_hash(gp), 'fd099f96480cc1100f37baa8e89a6b820dbbcbd3') with tarfile.open(gp, 'r:gz') as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def test_create_tar_gz_basic(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, 'test.tar.gz') with open(gp, 'wb') as fh: create_tar_gz_from_files(fh, files) self.assertEqual(file_hash(gp), 'acb602239c1aeb625da5e69336775609516d60f5') with tarfile.open(gp, 'r:gz') as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def test_tar_gz_name(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, 'test.tar.gz') with open(gp, 'wb') as fh: create_tar_gz_from_files(fh, files, filename='foobar', compresslevel=1) self.assertEqual(file_hash(gp), '1cc8b96f0262350977c2e9d61f40a1fa76f35c52') with tarfile.open(gp, 'r:gz') as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def test_create_tar_gz_basic(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, 'test.tar.gz') with open(gp, 'wb') as fh: create_tar_gz_from_files(fh, files) self.assertEqual(file_hash(gp), '7c4da5adc5088cdf00911d5daf9a67b15de714b7') with tarfile.open(gp, 'r:gz') as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def test_tar_gz_name(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, 'test.tar.gz') with open(gp, 'wb') as fh: create_tar_gz_from_files(fh, files, filename='foobar', compresslevel=1) self.assertEqual(file_hash(gp), '1cc8b96f0262350977c2e9d61f40a1fa76f35c52') with tarfile.open(gp, 'r:gz') as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def test_create_tar_gz_basic(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, 'test.tar.gz') with open(gp, 'wb') as fh: create_tar_gz_from_files(fh, files) self.assertEqual(file_hash(gp), '7c4da5adc5088cdf00911d5daf9a67b15de714b7') with tarfile.open(gp, 'r:gz') as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def test_create_tar_gz_basic(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, 'test.tar.gz') with open(gp, 'wb') as fh: create_tar_gz_from_files(fh, files) self.assertEqual(file_hash(gp), 'acb602239c1aeb625da5e69336775609516d60f5') with tarfile.open(gp, 'r:gz') as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def test_tar_gz_name(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, 'test.tar.gz') with open(gp, 'wb') as fh: create_tar_gz_from_files(fh, files, filename='foobar', compresslevel=1) self.assertEqual(file_hash(gp), 'fd099f96480cc1100f37baa8e89a6b820dbbcbd3') with tarfile.open(gp, 'r:gz') as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def main(argv): parser = argparse.ArgumentParser(description='Produce test archives') parser.add_argument('archive', help='Which archive to generate') parser.add_argument('outputfile', help='File to write output to') args = parser.parse_args(argv) out_file = args.outputfile if not out_file.endswith(('.tar.gz', '.zip')): raise Exception('expected tar.gz or zip output file') file_count = 0 t_start = time.time() ensureParentDir(out_file) res = find_files(args.archive) with open(out_file, 'wb') as fh: # Experimentation revealed that level 5 is significantly faster and has # marginally larger sizes than higher values and is the sweet spot # for optimal compression. Read the detailed commit message that # introduced this for raw numbers. if out_file.endswith('.tar.gz'): files = dict(res) create_tar_gz_from_files(fh, files, compresslevel=5) file_count = len(files) elif out_file.endswith('.zip'): with JarWriter(fileobj=fh, optimize=False, compress_level=5) as writer: for p, f in res: writer.add(p.encode('utf-8'), f.read(), mode=f.mode, skip_duplicates=True) file_count += 1 else: raise Exception('unhandled file extension: %s' % out_file) duration = time.time() - t_start zip_size = os.path.getsize(args.outputfile) basename = os.path.basename(args.outputfile) print('Wrote %d files in %d bytes to %s in %.2fs' % (file_count, zip_size, basename, duration))
def test_tar_gz_name(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, "test.tar.gz") with open(gp, "wb") as fh: create_tar_gz_from_files(fh, files, filename="foobar") if sys.version_info.major == 3 and sys.version_info.minor <= 8: self.assertEqual(file_hash(gp), "721e00083c17d16df2edbddf40136298c06d0c49") else: self.assertEqual(file_hash(gp), "059916c8e6b6f22be774dc56aabb3819460fee53") with tarfile.open(gp, "r:gz") as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def test_create_tar_gz_basic(self): d = tempfile.mkdtemp() try: files = self._create_files(d) gp = os.path.join(d, "test.tar.gz") with open(gp, "wb") as fh: create_tar_gz_from_files(fh, files) if sys.version_info.major == 3 and sys.version_info.minor <= 8: self.assertEqual(file_hash(gp), "7c4da5adc5088cdf00911d5daf9a67b15de714b7") else: self.assertEqual(file_hash(gp), "766117873af9754915ee93c56243616d000d7e5e") with tarfile.open(gp, "r:gz") as tf: self._verify_basic_tarfile(tf) finally: shutil.rmtree(d)
def main(argv): parser = argparse.ArgumentParser( description='Produce test archives') parser.add_argument('archive', help='Which archive to generate') parser.add_argument('outputfile', help='File to write output to') args = parser.parse_args(argv) out_file = args.outputfile if not out_file.endswith(('.tar.gz', '.zip')): raise Exception('expected tar.gz or zip output file') file_count = 0 t_start = time.time() ensureParentDir(out_file) res = find_files(args.archive) with open(out_file, 'wb') as fh: # Experimentation revealed that level 5 is significantly faster and has # marginally larger sizes than higher values and is the sweet spot # for optimal compression. Read the detailed commit message that # introduced this for raw numbers. if out_file.endswith('.tar.gz'): files = dict(res) create_tar_gz_from_files(fh, files, compresslevel=5) file_count = len(files) elif out_file.endswith('.zip'): with JarWriter(fileobj=fh, optimize=False, compress_level=5) as writer: for p, f in res: writer.add(p.encode('utf-8'), f.read(), mode=f.mode, skip_duplicates=True) file_count += 1 else: raise Exception('unhandled file extension: %s' % out_file) duration = time.time() - t_start zip_size = os.path.getsize(args.outputfile) basename = os.path.basename(args.outputfile) print('Wrote %d files in %d bytes to %s in %.2fs' % ( file_count, zip_size, basename, duration))
def main(argv): parser = argparse.ArgumentParser( description='Produce archive of generated sources') parser.add_argument('outputfile', help='File to write output to') args = parser.parse_args(argv) objdir_abspath = mozpath.abspath(buildconfig.topobjdir) def is_valid_entry(entry): if isinstance(entry[1], BaseFile): entry_abspath = mozpath.abspath(entry[1].path) else: entry_abspath = mozpath.abspath(entry[1]) if not entry_abspath.startswith(objdir_abspath): print("Warning: omitting generated source [%s] from archive" % entry_abspath, file=sys.stderr) return False return True files = dict(filter(is_valid_entry, get_generated_sources())) with open(args.outputfile, 'wb') as fh: create_tar_gz_from_files(fh, files, compresslevel=5)
def stream_context_tar(topsrcdir, context_dir, out_file, prefix, args=None): """Like create_context_tar, but streams the tar file to the `out_file` file object.""" archive_files = {} replace = [] for root, dirs, files in os.walk(context_dir): for f in files: source_path = os.path.join(root, f) rel = source_path[len(context_dir) + 1:] archive_path = os.path.join(prefix, rel) archive_files[archive_path] = source_path # Parse Dockerfile for special syntax of extra files to include. content = [] with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh: for line in fh: if line.startswith('# %ARG'): p = line[len('# %ARG '):].strip() if not args or p not in args: raise Exception('missing argument: {}'.format(p)) replace.append((re.compile(r'\${}\b'.format(p)), args[p].encode('ascii'))) continue for regexp, s in replace: line = re.sub(regexp, s, line) content.append(line) if not line.startswith('# %include'): continue p = line[len('# %include '):].strip() if os.path.isabs(p): raise Exception('extra include path cannot be absolute: %s' % p) fs_path = os.path.normpath(os.path.join(topsrcdir, p)) # Check for filesystem traversal exploits. if not fs_path.startswith(topsrcdir): raise Exception('extra include path outside topsrcdir: %s' % p) if not os.path.exists(fs_path): raise Exception('extra include path does not exist: %s' % p) if os.path.isdir(fs_path): for root, dirs, files in os.walk(fs_path): for f in files: source_path = os.path.join(root, f) rel = source_path[len(fs_path) + 1:] archive_path = os.path.join(prefix, 'topsrcdir', p, rel) archive_files[archive_path] = source_path else: archive_path = os.path.join(prefix, 'topsrcdir', p) archive_files[archive_path] = fs_path archive_files[os.path.join(prefix, 'Dockerfile')] = \ GeneratedFile(b''.join(content)) writer = HashingWriter(out_file) create_tar_gz_from_files(writer, archive_files, '%s.tar.gz' % prefix) return writer.hexdigest()
def stream_context_tar(topsrcdir, context_dir, out_file, prefix, args=None): """Like create_context_tar, but streams the tar file to the `out_file` file object.""" archive_files = {} replace = [] content = [] context_dir = os.path.join(topsrcdir, context_dir) for root, dirs, files in os.walk(context_dir): for f in files: source_path = os.path.join(root, f) rel = source_path[len(context_dir) + 1:] archive_path = os.path.join(prefix, rel) archive_files[archive_path] = source_path # Parse Dockerfile for special syntax of extra files to include. with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh: for line in fh: if line.startswith('# %ARG'): p = line[len('# %ARG '):].strip() if not args or p not in args: raise Exception('missing argument: {}'.format(p)) replace.append((re.compile(r'\${}\b'.format(p)), args[p].encode('ascii'))) continue for regexp, s in replace: line = re.sub(regexp, s, line) content.append(line) if not line.startswith('# %include'): continue p = line[len('# %include '):].strip() if os.path.isabs(p): raise Exception('extra include path cannot be absolute: %s' % p) fs_path = os.path.normpath(os.path.join(topsrcdir, p)) # Check for filesystem traversal exploits. if not fs_path.startswith(topsrcdir): raise Exception('extra include path outside topsrcdir: %s' % p) if not os.path.exists(fs_path): raise Exception('extra include path does not exist: %s' % p) if os.path.isdir(fs_path): for root, dirs, files in os.walk(fs_path): for f in files: source_path = os.path.join(root, f) rel = source_path[len(fs_path) + 1:] archive_path = os.path.join(prefix, 'topsrcdir', p, rel) archive_files[archive_path] = source_path else: archive_path = os.path.join(prefix, 'topsrcdir', p) archive_files[archive_path] = fs_path archive_files[os.path.join(prefix, 'Dockerfile')] = \ GeneratedFile(b''.join(content)) writer = HashingWriter(out_file) create_tar_gz_from_files(writer, archive_files, '%s.tar.gz' % prefix) return writer.hexdigest()
def create_context_tar(topsrcdir, context_dir, out_path, prefix): """Create a context tarball. A directory ``context_dir`` containing a Dockerfile will be assembled into a gzipped tar file at ``out_path``. Files inside the archive will be prefixed by directory ``prefix``. We also scan the source Dockerfile for special syntax that influences context generation. If a line in the Dockerfile has the form ``# %include <path>``, the relative path specified on that line will be matched against files in the source repository and added to the context under the path ``topsrcdir/``. If an entry is a directory, we add all files under that directory. Returns the SHA-256 hex digest of the created archive. """ archive_files = {} for root, dirs, files in os.walk(context_dir): for f in files: source_path = os.path.join(root, f) rel = source_path[len(context_dir) + 1:] archive_path = os.path.join(prefix, rel) archive_files[archive_path] = source_path # Parse Dockerfile for special syntax of extra files to include. with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh: for line in fh: line = line.rstrip() if not line.startswith('# %include'): continue p = line[len('# %include '):].strip() if os.path.isabs(p): raise Exception('extra include path cannot be absolute: %s' % p) fs_path = os.path.normpath(os.path.join(topsrcdir, p)) # Check for filesystem traversal exploits. if not fs_path.startswith(topsrcdir): raise Exception('extra include path outside topsrcdir: %s' % p) if not os.path.exists(fs_path): raise Exception('extra include path does not exist: %s' % p) if os.path.isdir(fs_path): for root, dirs, files in os.walk(fs_path): for f in files: source_path = os.path.join(root, f) archive_path = os.path.join(prefix, 'topsrcdir', p, f) archive_files[archive_path] = source_path else: archive_path = os.path.join(prefix, 'topsrcdir', p) archive_files[archive_path] = fs_path with open(out_path, 'wb') as fh: create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix) h = hashlib.sha256() with open(out_path, 'rb') as fh: while True: data = fh.read(32768) if not data: break h.update(data) return h.hexdigest()
def create_context_tar(topsrcdir, context_dir, out_path, prefix): """Create a context tarball. A directory ``context_dir`` containing a Dockerfile will be assembled into a gzipped tar file at ``out_path``. Files inside the archive will be prefixed by directory ``prefix``. We also scan the source Dockerfile for special syntax that influences context generation. If a line in the Dockerfile has the form ``# %include <path>``, the relative path specified on that line will be matched against files in the source repository and added to the context under the path ``topsrcdir/``. If an entry is a directory, we add all files under that directory. Returns the SHA-256 hex digest of the created archive. """ archive_files = {} for root, dirs, files in os.walk(context_dir): for f in files: source_path = os.path.join(root, f) rel = source_path[len(context_dir) + 1:] archive_path = os.path.join(prefix, rel) archive_files[archive_path] = source_path # Parse Dockerfile for special syntax of extra files to include. with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh: for line in fh: line = line.rstrip() if not line.startswith('# %include'): continue p = line[len('# %include '):].strip() if os.path.isabs(p): raise Exception('extra include path cannot be absolute: %s' % p) fs_path = os.path.normpath(os.path.join(topsrcdir, p)) # Check for filesystem traversal exploits. if not fs_path.startswith(topsrcdir): raise Exception('extra include path outside topsrcdir: %s' % p) if not os.path.exists(fs_path): raise Exception('extra include path does not exist: %s' % p) if os.path.isdir(fs_path): for root, dirs, files in os.walk(fs_path): for f in files: source_path = os.path.join(root, f) archive_path = os.path.join(prefix, 'topsrcdir', p, f) archive_files[archive_path] = source_path else: archive_path = os.path.join(prefix, 'topsrcdir', p) archive_files[archive_path] = fs_path with open(out_path, 'wb') as fh: create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix) h = hashlib.sha256() with open(out_path, 'rb') as fh: while True: data = fh.read(32768) if not data: break h.update(data) return h.hexdigest()
def create_context_tar(topsrcdir, context_dir, out_path, prefix, args=None): """Create a context tarball. A directory ``context_dir`` containing a Dockerfile will be assembled into a gzipped tar file at ``out_path``. Files inside the archive will be prefixed by directory ``prefix``. We also scan the source Dockerfile for special syntax that influences context generation. If a line in the Dockerfile has the form ``# %include <path>``, the relative path specified on that line will be matched against files in the source repository and added to the context under the path ``topsrcdir/``. If an entry is a directory, we add all files under that directory. If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of the string ``$<name>`` in subsequent lines are replaced with the value found in the ``args`` argument. Exception: this doesn't apply to VOLUME definitions. Returns the SHA-256 hex digest of the created archive. """ archive_files = {} replace = [] for root, dirs, files in os.walk(context_dir): for f in files: source_path = os.path.join(root, f) rel = source_path[len(context_dir) + 1:] archive_path = os.path.join(prefix, rel) archive_files[archive_path] = source_path # Parse Dockerfile for special syntax of extra files to include. content = [] with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh: for line in fh: if line.startswith('# %ARG'): p = line[len('# %ARG '):].strip() if not args or p not in args: raise Exception('missing argument: {}'.format(p)) replace.append( (re.compile(r'\${}\b'.format(p)), args[p].encode('ascii'))) continue for regexp, s in replace: line = re.sub(regexp, s, line) content.append(line) if not line.startswith('# %include'): continue p = line[len('# %include '):].strip() if os.path.isabs(p): raise Exception('extra include path cannot be absolute: %s' % p) fs_path = os.path.normpath(os.path.join(topsrcdir, p)) # Check for filesystem traversal exploits. if not fs_path.startswith(topsrcdir): raise Exception('extra include path outside topsrcdir: %s' % p) if not os.path.exists(fs_path): raise Exception('extra include path does not exist: %s' % p) if os.path.isdir(fs_path): for root, dirs, files in os.walk(fs_path): for f in files: source_path = os.path.join(root, f) rel = source_path[len(fs_path) + 1:] archive_path = os.path.join(prefix, 'topsrcdir', p, rel) archive_files[archive_path] = source_path else: archive_path = os.path.join(prefix, 'topsrcdir', p) archive_files[archive_path] = fs_path archive_files[os.path.join(prefix, 'Dockerfile')] = \ GeneratedFile(b''.join(content)) with open(out_path, 'wb') as fh: create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix) h = hashlib.sha256() with open(out_path, 'rb') as fh: while True: data = fh.read(32768) if not data: break h.update(data) return h.hexdigest()
def stream_context_tar(topsrcdir, context_dir, out_file, image_name, args): """Like create_context_tar, but streams the tar file to the `out_file` file object.""" archive_files = {} replace = [] content = [] context_dir = os.path.join(topsrcdir, context_dir) for root, dirs, files in os.walk(context_dir): for f in files: source_path = os.path.join(root, f) archive_path = source_path[len(context_dir) + 1:] archive_files[archive_path] = source_path # Parse Dockerfile for special syntax of extra files to include. with io.open(os.path.join(context_dir, "Dockerfile"), "r") as fh: for line in fh: if line.startswith("# %ARG"): p = line[len("# %ARG "):].strip() if not args or p not in args: raise Exception("missing argument: {}".format(p)) replace.append((re.compile(r"\${}\b".format(p)), args[p])) continue for regexp, s in replace: line = re.sub(regexp, s, line) content.append(line) if not line.startswith("# %include"): continue p = line[len("# %include "):].strip() if os.path.isabs(p): raise Exception("extra include path cannot be absolute: %s" % p) fs_path = os.path.normpath(os.path.join(topsrcdir, p)) # Check for filesystem traversal exploits. if not fs_path.startswith(topsrcdir): raise Exception("extra include path outside topsrcdir: %s" % p) if not os.path.exists(fs_path): raise Exception("extra include path does not exist: %s" % p) if os.path.isdir(fs_path): for root, dirs, files in os.walk(fs_path): for f in files: source_path = os.path.join(root, f) rel = source_path[len(fs_path) + 1:] archive_path = os.path.join("topsrcdir", p, rel) archive_files[archive_path] = source_path else: archive_path = os.path.join("topsrcdir", p) archive_files[archive_path] = fs_path archive_files["Dockerfile"] = GeneratedFile(b"".join( six.ensure_binary(s) for s in content)) writer = HashingWriter(out_file) create_tar_gz_from_files(writer, archive_files, "{}.tar".format(image_name)) return writer.hexdigest()