Esempio n. 1
0
def create_tarball(filename, root):
    """Create a tar.gz archive of docs in a directory."""
    files = dict(distribution_files(root))

    with open(filename, 'wb') as fh:
        create_tar_gz_from_files(fh, files, filename=os.path.basename(filename),
                                 compresslevel=6)
Esempio n. 2
0
def create_tarball(filename, root):
    """Create a tar.gz archive of docs in a directory."""
    files = dict(distribution_files(root))

    with open(filename, 'wb') as fh:
        create_tar_gz_from_files(fh, files, filename=os.path.basename(filename),
                                 compresslevel=6)
Esempio n. 3
0
def stream_context_tar(topsrcdir, context_dir, out_file, image_name, args):
    """Like create_context_tar, but streams the tar file to the `out_file` file
    object."""
    archive_files = {}
    content = []

    context_dir = os.path.join(topsrcdir, context_dir)

    for root, dirs, files in os.walk(context_dir):
        for f in files:
            source_path = os.path.join(root, f)
            archive_path = source_path[len(context_dir) + 1:]
            archive_files[archive_path] = source_path

    # Parse Dockerfile for special syntax of extra files to include.
    with open(os.path.join(context_dir, "Dockerfile"), "r") as fh:
        for line in fh:
            content.append(line)

            if not line.startswith("# %include"):
                continue

            p = line[len("# %include "):].strip()
            if os.path.isabs(p):
                raise Exception("extra include path cannot be absolute: %s" %
                                p)

            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
            # Check for filesystem traversal exploits.
            if not fs_path.startswith(topsrcdir):
                raise Exception("extra include path outside topsrcdir: %s" % p)

            if not os.path.exists(fs_path):
                raise Exception("extra include path does not exist: %s" % p)

            if os.path.isdir(fs_path):
                for root, dirs, files in os.walk(fs_path):
                    for f in files:
                        source_path = os.path.join(root, f)
                        rel = source_path[len(fs_path) + 1:]
                        archive_path = os.path.join("topsrcdir", p, rel)
                        archive_files[archive_path] = source_path
            else:
                archive_path = os.path.join("topsrcdir", p)
                archive_files[archive_path] = fs_path

    archive_files["Dockerfile"] = GeneratedFile(
        "".join(content).encode("utf-8"))

    writer = HashingWriter(out_file)
    create_tar_gz_from_files(writer, archive_files, f"{image_name}.tar")
    return writer.hexdigest()
Esempio n. 4
0
    def test_tar_gz_name(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, 'test.tar.gz')
            with open(gp, 'wb') as fh:
                create_tar_gz_from_files(fh, files, filename='foobar', compresslevel=1)

            self.assertEqual(file_hash(gp), 'fd099f96480cc1100f37baa8e89a6b820dbbcbd3')

            with tarfile.open(gp, 'r:gz') as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 5
0
    def test_create_tar_gz_basic(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, 'test.tar.gz')
            with open(gp, 'wb') as fh:
                create_tar_gz_from_files(fh, files)

            self.assertEqual(file_hash(gp), 'acb602239c1aeb625da5e69336775609516d60f5')

            with tarfile.open(gp, 'r:gz') as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 6
0
    def test_tar_gz_name(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, 'test.tar.gz')
            with open(gp, 'wb') as fh:
                create_tar_gz_from_files(fh, files, filename='foobar', compresslevel=1)

            self.assertEqual(file_hash(gp), '1cc8b96f0262350977c2e9d61f40a1fa76f35c52')

            with tarfile.open(gp, 'r:gz') as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 7
0
    def test_create_tar_gz_basic(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, 'test.tar.gz')
            with open(gp, 'wb') as fh:
                create_tar_gz_from_files(fh, files)

            self.assertEqual(file_hash(gp), '7c4da5adc5088cdf00911d5daf9a67b15de714b7')

            with tarfile.open(gp, 'r:gz') as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 8
0
    def test_tar_gz_name(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, 'test.tar.gz')
            with open(gp, 'wb') as fh:
                create_tar_gz_from_files(fh, files, filename='foobar', compresslevel=1)

            self.assertEqual(file_hash(gp), '1cc8b96f0262350977c2e9d61f40a1fa76f35c52')

            with tarfile.open(gp, 'r:gz') as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 9
0
    def test_create_tar_gz_basic(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, 'test.tar.gz')
            with open(gp, 'wb') as fh:
                create_tar_gz_from_files(fh, files)

            self.assertEqual(file_hash(gp), '7c4da5adc5088cdf00911d5daf9a67b15de714b7')

            with tarfile.open(gp, 'r:gz') as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 10
0
    def test_create_tar_gz_basic(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, 'test.tar.gz')
            with open(gp, 'wb') as fh:
                create_tar_gz_from_files(fh, files)

            self.assertEqual(file_hash(gp),
                             'acb602239c1aeb625da5e69336775609516d60f5')

            with tarfile.open(gp, 'r:gz') as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 11
0
    def test_tar_gz_name(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, 'test.tar.gz')
            with open(gp, 'wb') as fh:
                create_tar_gz_from_files(fh,
                                         files,
                                         filename='foobar',
                                         compresslevel=1)

            self.assertEqual(file_hash(gp),
                             'fd099f96480cc1100f37baa8e89a6b820dbbcbd3')

            with tarfile.open(gp, 'r:gz') as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 12
0
def main(argv):
    parser = argparse.ArgumentParser(description='Produce test archives')
    parser.add_argument('archive', help='Which archive to generate')
    parser.add_argument('outputfile', help='File to write output to')

    args = parser.parse_args(argv)

    out_file = args.outputfile
    if not out_file.endswith(('.tar.gz', '.zip')):
        raise Exception('expected tar.gz or zip output file')

    file_count = 0
    t_start = time.time()
    ensureParentDir(out_file)
    res = find_files(args.archive)
    with open(out_file, 'wb') as fh:
        # Experimentation revealed that level 5 is significantly faster and has
        # marginally larger sizes than higher values and is the sweet spot
        # for optimal compression. Read the detailed commit message that
        # introduced this for raw numbers.
        if out_file.endswith('.tar.gz'):
            files = dict(res)
            create_tar_gz_from_files(fh, files, compresslevel=5)
            file_count = len(files)
        elif out_file.endswith('.zip'):
            with JarWriter(fileobj=fh, optimize=False,
                           compress_level=5) as writer:
                for p, f in res:
                    writer.add(p.encode('utf-8'),
                               f.read(),
                               mode=f.mode,
                               skip_duplicates=True)
                    file_count += 1
        else:
            raise Exception('unhandled file extension: %s' % out_file)

    duration = time.time() - t_start
    zip_size = os.path.getsize(args.outputfile)
    basename = os.path.basename(args.outputfile)
    print('Wrote %d files in %d bytes to %s in %.2fs' %
          (file_count, zip_size, basename, duration))
Esempio n. 13
0
    def test_tar_gz_name(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, "test.tar.gz")
            with open(gp, "wb") as fh:
                create_tar_gz_from_files(fh, files, filename="foobar")

            if sys.version_info.major == 3 and sys.version_info.minor <= 8:
                self.assertEqual(file_hash(gp),
                                 "721e00083c17d16df2edbddf40136298c06d0c49")
            else:
                self.assertEqual(file_hash(gp),
                                 "059916c8e6b6f22be774dc56aabb3819460fee53")

            with tarfile.open(gp, "r:gz") as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 14
0
    def test_create_tar_gz_basic(self):
        d = tempfile.mkdtemp()
        try:
            files = self._create_files(d)

            gp = os.path.join(d, "test.tar.gz")
            with open(gp, "wb") as fh:
                create_tar_gz_from_files(fh, files)

            if sys.version_info.major == 3 and sys.version_info.minor <= 8:
                self.assertEqual(file_hash(gp),
                                 "7c4da5adc5088cdf00911d5daf9a67b15de714b7")
            else:
                self.assertEqual(file_hash(gp),
                                 "766117873af9754915ee93c56243616d000d7e5e")

            with tarfile.open(gp, "r:gz") as tf:
                self._verify_basic_tarfile(tf)

        finally:
            shutil.rmtree(d)
Esempio n. 15
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Produce test archives')
    parser.add_argument('archive', help='Which archive to generate')
    parser.add_argument('outputfile', help='File to write output to')

    args = parser.parse_args(argv)

    out_file = args.outputfile
    if not out_file.endswith(('.tar.gz', '.zip')):
        raise Exception('expected tar.gz or zip output file')

    file_count = 0
    t_start = time.time()
    ensureParentDir(out_file)
    res = find_files(args.archive)
    with open(out_file, 'wb') as fh:
        # Experimentation revealed that level 5 is significantly faster and has
        # marginally larger sizes than higher values and is the sweet spot
        # for optimal compression. Read the detailed commit message that
        # introduced this for raw numbers.
        if out_file.endswith('.tar.gz'):
            files = dict(res)
            create_tar_gz_from_files(fh, files, compresslevel=5)
            file_count = len(files)
        elif out_file.endswith('.zip'):
            with JarWriter(fileobj=fh, optimize=False, compress_level=5) as writer:
                for p, f in res:
                    writer.add(p.encode('utf-8'), f.read(), mode=f.mode,
                               skip_duplicates=True)
                    file_count += 1
        else:
            raise Exception('unhandled file extension: %s' % out_file)

    duration = time.time() - t_start
    zip_size = os.path.getsize(args.outputfile)
    basename = os.path.basename(args.outputfile)
    print('Wrote %d files in %d bytes to %s in %.2fs' % (
          file_count, zip_size, basename, duration))
Esempio n. 16
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Produce archive of generated sources')
    parser.add_argument('outputfile', help='File to write output to')
    args = parser.parse_args(argv)

    objdir_abspath = mozpath.abspath(buildconfig.topobjdir)

    def is_valid_entry(entry):
        if isinstance(entry[1], BaseFile):
            entry_abspath = mozpath.abspath(entry[1].path)
        else:
            entry_abspath = mozpath.abspath(entry[1])
        if not entry_abspath.startswith(objdir_abspath):
            print("Warning: omitting generated source [%s] from archive" % entry_abspath,
                  file=sys.stderr)
            return False
        return True

    files = dict(filter(is_valid_entry, get_generated_sources()))
    with open(args.outputfile, 'wb') as fh:
        create_tar_gz_from_files(fh, files, compresslevel=5)
Esempio n. 17
0
def stream_context_tar(topsrcdir, context_dir, out_file, prefix, args=None):
    """Like create_context_tar, but streams the tar file to the `out_file` file
    object."""
    archive_files = {}
    replace = []

    for root, dirs, files in os.walk(context_dir):
        for f in files:
            source_path = os.path.join(root, f)
            rel = source_path[len(context_dir) + 1:]
            archive_path = os.path.join(prefix, rel)
            archive_files[archive_path] = source_path

    # Parse Dockerfile for special syntax of extra files to include.
    content = []
    with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
        for line in fh:
            if line.startswith('# %ARG'):
                p = line[len('# %ARG '):].strip()
                if not args or p not in args:
                    raise Exception('missing argument: {}'.format(p))
                replace.append((re.compile(r'\${}\b'.format(p)),
                                args[p].encode('ascii')))
                continue

            for regexp, s in replace:
                line = re.sub(regexp, s, line)

            content.append(line)

            if not line.startswith('# %include'):
                continue

            p = line[len('# %include '):].strip()
            if os.path.isabs(p):
                raise Exception('extra include path cannot be absolute: %s' % p)

            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
            # Check for filesystem traversal exploits.
            if not fs_path.startswith(topsrcdir):
                raise Exception('extra include path outside topsrcdir: %s' % p)

            if not os.path.exists(fs_path):
                raise Exception('extra include path does not exist: %s' % p)

            if os.path.isdir(fs_path):
                for root, dirs, files in os.walk(fs_path):
                    for f in files:
                        source_path = os.path.join(root, f)
                        rel = source_path[len(fs_path) + 1:]
                        archive_path = os.path.join(prefix, 'topsrcdir', p, rel)
                        archive_files[archive_path] = source_path
            else:
                archive_path = os.path.join(prefix, 'topsrcdir', p)
                archive_files[archive_path] = fs_path

    archive_files[os.path.join(prefix, 'Dockerfile')] = \
        GeneratedFile(b''.join(content))

    writer = HashingWriter(out_file)
    create_tar_gz_from_files(writer, archive_files, '%s.tar.gz' % prefix)
    return writer.hexdigest()
Esempio n. 18
0
def stream_context_tar(topsrcdir, context_dir, out_file, prefix, args=None):
    """Like create_context_tar, but streams the tar file to the `out_file` file
    object."""
    archive_files = {}
    replace = []
    content = []

    context_dir = os.path.join(topsrcdir, context_dir)

    for root, dirs, files in os.walk(context_dir):
        for f in files:
            source_path = os.path.join(root, f)
            rel = source_path[len(context_dir) + 1:]
            archive_path = os.path.join(prefix, rel)
            archive_files[archive_path] = source_path

    # Parse Dockerfile for special syntax of extra files to include.
    with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
        for line in fh:
            if line.startswith('# %ARG'):
                p = line[len('# %ARG '):].strip()
                if not args or p not in args:
                    raise Exception('missing argument: {}'.format(p))
                replace.append((re.compile(r'\${}\b'.format(p)),
                                args[p].encode('ascii')))
                continue

            for regexp, s in replace:
                line = re.sub(regexp, s, line)

            content.append(line)

            if not line.startswith('# %include'):
                continue

            p = line[len('# %include '):].strip()
            if os.path.isabs(p):
                raise Exception('extra include path cannot be absolute: %s' % p)

            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
            # Check for filesystem traversal exploits.
            if not fs_path.startswith(topsrcdir):
                raise Exception('extra include path outside topsrcdir: %s' % p)

            if not os.path.exists(fs_path):
                raise Exception('extra include path does not exist: %s' % p)

            if os.path.isdir(fs_path):
                for root, dirs, files in os.walk(fs_path):
                    for f in files:
                        source_path = os.path.join(root, f)
                        rel = source_path[len(fs_path) + 1:]
                        archive_path = os.path.join(prefix, 'topsrcdir', p, rel)
                        archive_files[archive_path] = source_path
            else:
                archive_path = os.path.join(prefix, 'topsrcdir', p)
                archive_files[archive_path] = fs_path

    archive_files[os.path.join(prefix, 'Dockerfile')] = \
        GeneratedFile(b''.join(content))

    writer = HashingWriter(out_file)
    create_tar_gz_from_files(writer, archive_files, '%s.tar.gz' % prefix)
    return writer.hexdigest()
Esempio n. 19
0
def create_context_tar(topsrcdir, context_dir, out_path, prefix):
    """Create a context tarball.

    A directory ``context_dir`` containing a Dockerfile will be assembled into
    a gzipped tar file at ``out_path``. Files inside the archive will be
    prefixed by directory ``prefix``.

    We also scan the source Dockerfile for special syntax that influences
    context generation.

    If a line in the Dockerfile has the form ``# %include <path>``,
    the relative path specified on that line will be matched against
    files in the source repository and added to the context under the
    path ``topsrcdir/``. If an entry is a directory, we add all files
    under that directory.

    Returns the SHA-256 hex digest of the created archive.
    """
    archive_files = {}

    for root, dirs, files in os.walk(context_dir):
        for f in files:
            source_path = os.path.join(root, f)
            rel = source_path[len(context_dir) + 1:]
            archive_path = os.path.join(prefix, rel)
            archive_files[archive_path] = source_path

    # Parse Dockerfile for special syntax of extra files to include.
    with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
        for line in fh:
            line = line.rstrip()
            if not line.startswith('# %include'):
                continue

            p = line[len('# %include '):].strip()
            if os.path.isabs(p):
                raise Exception('extra include path cannot be absolute: %s' % p)

            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
            # Check for filesystem traversal exploits.
            if not fs_path.startswith(topsrcdir):
                raise Exception('extra include path outside topsrcdir: %s' % p)

            if not os.path.exists(fs_path):
                raise Exception('extra include path does not exist: %s' % p)

            if os.path.isdir(fs_path):
                for root, dirs, files in os.walk(fs_path):
                    for f in files:
                        source_path = os.path.join(root, f)
                        archive_path = os.path.join(prefix, 'topsrcdir', p, f)
                        archive_files[archive_path] = source_path
            else:
                archive_path = os.path.join(prefix, 'topsrcdir', p)
                archive_files[archive_path] = fs_path

    with open(out_path, 'wb') as fh:
        create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)

    h = hashlib.sha256()
    with open(out_path, 'rb') as fh:
        while True:
            data = fh.read(32768)
            if not data:
                break
            h.update(data)
    return h.hexdigest()
Esempio n. 20
0
def create_context_tar(topsrcdir, context_dir, out_path, prefix):
    """Create a context tarball.

    A directory ``context_dir`` containing a Dockerfile will be assembled into
    a gzipped tar file at ``out_path``. Files inside the archive will be
    prefixed by directory ``prefix``.

    We also scan the source Dockerfile for special syntax that influences
    context generation.

    If a line in the Dockerfile has the form ``# %include <path>``,
    the relative path specified on that line will be matched against
    files in the source repository and added to the context under the
    path ``topsrcdir/``. If an entry is a directory, we add all files
    under that directory.

    Returns the SHA-256 hex digest of the created archive.
    """
    archive_files = {}

    for root, dirs, files in os.walk(context_dir):
        for f in files:
            source_path = os.path.join(root, f)
            rel = source_path[len(context_dir) + 1:]
            archive_path = os.path.join(prefix, rel)
            archive_files[archive_path] = source_path

    # Parse Dockerfile for special syntax of extra files to include.
    with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
        for line in fh:
            line = line.rstrip()
            if not line.startswith('# %include'):
                continue

            p = line[len('# %include '):].strip()
            if os.path.isabs(p):
                raise Exception('extra include path cannot be absolute: %s' %
                                p)

            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
            # Check for filesystem traversal exploits.
            if not fs_path.startswith(topsrcdir):
                raise Exception('extra include path outside topsrcdir: %s' % p)

            if not os.path.exists(fs_path):
                raise Exception('extra include path does not exist: %s' % p)

            if os.path.isdir(fs_path):
                for root, dirs, files in os.walk(fs_path):
                    for f in files:
                        source_path = os.path.join(root, f)
                        archive_path = os.path.join(prefix, 'topsrcdir', p, f)
                        archive_files[archive_path] = source_path
            else:
                archive_path = os.path.join(prefix, 'topsrcdir', p)
                archive_files[archive_path] = fs_path

    with open(out_path, 'wb') as fh:
        create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)

    h = hashlib.sha256()
    with open(out_path, 'rb') as fh:
        while True:
            data = fh.read(32768)
            if not data:
                break
            h.update(data)
    return h.hexdigest()
Esempio n. 21
0
def create_context_tar(topsrcdir, context_dir, out_path, prefix, args=None):
    """Create a context tarball.

    A directory ``context_dir`` containing a Dockerfile will be assembled into
    a gzipped tar file at ``out_path``. Files inside the archive will be
    prefixed by directory ``prefix``.

    We also scan the source Dockerfile for special syntax that influences
    context generation.

    If a line in the Dockerfile has the form ``# %include <path>``,
    the relative path specified on that line will be matched against
    files in the source repository and added to the context under the
    path ``topsrcdir/``. If an entry is a directory, we add all files
    under that directory.

    If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
    the string ``$<name>`` in subsequent lines are replaced with the value
    found in the ``args`` argument. Exception: this doesn't apply to VOLUME
    definitions.

    Returns the SHA-256 hex digest of the created archive.
    """
    archive_files = {}
    replace = []

    for root, dirs, files in os.walk(context_dir):
        for f in files:
            source_path = os.path.join(root, f)
            rel = source_path[len(context_dir) + 1:]
            archive_path = os.path.join(prefix, rel)
            archive_files[archive_path] = source_path

    # Parse Dockerfile for special syntax of extra files to include.
    content = []
    with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
        for line in fh:
            if line.startswith('# %ARG'):
                p = line[len('# %ARG '):].strip()
                if not args or p not in args:
                    raise Exception('missing argument: {}'.format(p))
                replace.append(
                    (re.compile(r'\${}\b'.format(p)), args[p].encode('ascii')))
                continue

            for regexp, s in replace:
                line = re.sub(regexp, s, line)

            content.append(line)

            if not line.startswith('# %include'):
                continue

            p = line[len('# %include '):].strip()
            if os.path.isabs(p):
                raise Exception('extra include path cannot be absolute: %s' %
                                p)

            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
            # Check for filesystem traversal exploits.
            if not fs_path.startswith(topsrcdir):
                raise Exception('extra include path outside topsrcdir: %s' % p)

            if not os.path.exists(fs_path):
                raise Exception('extra include path does not exist: %s' % p)

            if os.path.isdir(fs_path):
                for root, dirs, files in os.walk(fs_path):
                    for f in files:
                        source_path = os.path.join(root, f)
                        rel = source_path[len(fs_path) + 1:]
                        archive_path = os.path.join(prefix, 'topsrcdir', p,
                                                    rel)
                        archive_files[archive_path] = source_path
            else:
                archive_path = os.path.join(prefix, 'topsrcdir', p)
                archive_files[archive_path] = fs_path

    archive_files[os.path.join(prefix, 'Dockerfile')] = \
        GeneratedFile(b''.join(content))

    with open(out_path, 'wb') as fh:
        create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)

    h = hashlib.sha256()
    with open(out_path, 'rb') as fh:
        while True:
            data = fh.read(32768)
            if not data:
                break
            h.update(data)
    return h.hexdigest()
Esempio n. 22
0
def stream_context_tar(topsrcdir, context_dir, out_file, image_name, args):
    """Like create_context_tar, but streams the tar file to the `out_file` file
    object."""
    archive_files = {}
    replace = []
    content = []

    context_dir = os.path.join(topsrcdir, context_dir)

    for root, dirs, files in os.walk(context_dir):
        for f in files:
            source_path = os.path.join(root, f)
            archive_path = source_path[len(context_dir) + 1:]
            archive_files[archive_path] = source_path

    # Parse Dockerfile for special syntax of extra files to include.
    with io.open(os.path.join(context_dir, "Dockerfile"), "r") as fh:
        for line in fh:
            if line.startswith("# %ARG"):
                p = line[len("# %ARG "):].strip()
                if not args or p not in args:
                    raise Exception("missing argument: {}".format(p))
                replace.append((re.compile(r"\${}\b".format(p)), args[p]))
                continue

            for regexp, s in replace:
                line = re.sub(regexp, s, line)

            content.append(line)

            if not line.startswith("# %include"):
                continue

            p = line[len("# %include "):].strip()
            if os.path.isabs(p):
                raise Exception("extra include path cannot be absolute: %s" %
                                p)

            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
            # Check for filesystem traversal exploits.
            if not fs_path.startswith(topsrcdir):
                raise Exception("extra include path outside topsrcdir: %s" % p)

            if not os.path.exists(fs_path):
                raise Exception("extra include path does not exist: %s" % p)

            if os.path.isdir(fs_path):
                for root, dirs, files in os.walk(fs_path):
                    for f in files:
                        source_path = os.path.join(root, f)
                        rel = source_path[len(fs_path) + 1:]
                        archive_path = os.path.join("topsrcdir", p, rel)
                        archive_files[archive_path] = source_path
            else:
                archive_path = os.path.join("topsrcdir", p)
                archive_files[archive_path] = fs_path

    archive_files["Dockerfile"] = GeneratedFile(b"".join(
        six.ensure_binary(s) for s in content))

    writer = HashingWriter(out_file)
    create_tar_gz_from_files(writer, archive_files,
                             "{}.tar".format(image_name))
    return writer.hexdigest()