Ejemplo n.º 1
0
def pull(cli):
    ch.dependencies_check()
    # Where does it go?
    dlcache = cli.storage + "/dlcache"
    if (cli.image_dir is not None):
        unpack_dir = cli.image_dir
        image_subdir = ""
    else:
        unpack_dir = cli.storage + "/img"
        image_subdir = None  # infer from image ref
    # Set things up.
    ref = ch.Image_Ref(cli.image_ref)
    if (cli.parse_only):
        print(ref.as_verbose_str)
        sys.exit(0)
    image = ch.Image(ref, dlcache, unpack_dir, image_subdir)
    ch.INFO("pulling image:   %s" % image.ref)
    if (cli.image_dir is not None):
        ch.INFO("destination:     %s" % image.unpack_path)
    else:
        ch.DEBUG("destination:     %s" % image.unpack_path)
    ch.DEBUG("use cache:       %s" % (not cli.no_cache))
    ch.DEBUG("download cache:  %s" % image.download_cache)
    ch.DEBUG("manifest:        %s" % image.manifest_path)
    # Pull!
    image.pull_to_unpacked(use_cache=(not cli.no_cache),
                           last_layer=cli.last_layer)
    # Done.
    ch.INFO("done")
Ejemplo n.º 2
0
def config(img):
    ch.DEBUG("fakeroot: checking configs: %s" % img)
    for c in DEFAULT_CONFIGS:
        (path, rx) = c["match"]
        path_full = "%s/%s" % (img, path)
        ch.DEBUG("fakeroot: checking %s: grep '%s' %s" %
                 (c["config"]["name"], rx, path))
        if (os.path.isfile(path_full) and ch.grep_p(path_full, rx)):
            ch.DEBUG("fakeroot: using config %s" % c["config"]["name"])
            return c["config"]
    ch.DEBUG("fakeroot: no config found")
    return None
Ejemplo n.º 3
0
    def prepare(self):
        """Prepare self.image for pushing to self.dst_ref. Return tuple: (list
         of gzipped layer tarball paths, config as a sequence of bytes,
         manifest as a sequence of bytes).

         There is not currently any support for re-using any previously
         prepared files already in the upload cache, because we don't yet have
         a way to know if these have changed until they are already build."""
        ch.mkdirs(ch.storage.upload_cache)
        tars_uc = self.image.tarballs_write(ch.storage.upload_cache)
        tars_c = list()
        config = self.config_new()
        manifest = self.manifest_new()
        # Prepare layers.
        for (i, tar_uc) in enumerate(tars_uc, start=1):
            ch.INFO("layer %d/%d: preparing" % (i, len(tars_uc)))
            path_uc = ch.storage.upload_cache // tar_uc
            hash_uc = ch.file_hash(path_uc)
            config["rootfs"]["diff_ids"].append("sha256:" + hash_uc)
            #size_uc = ch.file_size(path_uc)
            path_c = ch.file_gzip(path_uc, ["-9", "--no-name"])
            tar_c = path_c.name
            hash_c = ch.file_hash(path_c)
            size_c = ch.file_size(path_c)
            tars_c.append((hash_c, path_c))
            manifest["layers"].append({
                "mediaType": ch.TYPE_LAYER,
                "size": size_c,
                "digest": "sha256:" + hash_c
            })
        # Prepare metadata.
        ch.INFO("preparing metadata")
        config_bytes = json.dumps(config, indent=2).encode("UTF-8")
        config_hash = ch.bytes_hash(config_bytes)
        manifest["config"]["size"] = len(config_bytes)
        manifest["config"]["digest"] = "sha256:" + config_hash
        ch.DEBUG("config: %s\n%s" %
                 (config_hash, config_bytes.decode("UTF-8")))
        manifest_bytes = json.dumps(manifest, indent=2).encode("UTF-8")
        ch.DEBUG("manifest:\n%s" % manifest_bytes.decode("UTF-8"))
        # Store for the next steps.
        self.layers = tars_c
        self.config = config_bytes
        self.manifest = manifest_bytes
Ejemplo n.º 4
0
 def execute_(self):
     # Complain about unsupported stuff.
     if (self.options.pop("platform", False)):
         self.unsupported_yet_fatal("--platform", 778)
     # Any remaining options are invalid.
     self.options_assert_empty()
     # Update image globals.
     global image_i
     image_i += 1
     global image_alias
     image_alias = self.alias
     if (image_i == image_ct - 1):
         # Last image; use tag unchanged.
         tag = cli.tag
     elif (image_i > image_ct - 1):
         # Too many images!
         ch.FATAL("expected %d stages but found at least %d" %
                  (image_ct, image_i + 1))
     else:
         # Not last image; append stage index to tag.
         tag = "%s/_stage%d" % (cli.tag, image_i)
     image = ch.Image(ch.Image_Ref(tag), cli.storage + "/dlcache",
                      cli.storage + "/img")
     images[image_i] = image
     if (self.alias is not None):
         images[self.alias] = image
     ch.DEBUG("image path: %s" % image.unpack_path)
     # Other error checking.
     if (str(image.ref) == str(self.base_ref)):
         ch.FATAL("output image ref same as FROM: %s" % self.base_ref)
     # Initialize image.
     self.base_image = ch.Image(self.base_ref, image.download_cache,
                                image.unpack_dir)
     if (not os.path.isdir(self.base_image.unpack_path)):
         ch.DEBUG("image not found, pulling: %s" %
                  self.base_image.unpack_path)
         self.base_image.pull_to_unpacked(fixup=True)
     image.copy_unpacked(self.base_image)
     env.reset()
     # Inject fakeroot preparatory stuff if needed.
     if (not cli.no_fakeroot):
         fakeroot.inject_first(image.unpack_path, env.env_build)
Ejemplo n.º 5
0
def inject_first(img, env):
    c = config(img)
    if (c is None):
        return
    if (os.path.exists("%s/ch/fakeroot-first-run")):
        ch.DEBUG("fakeroot: already initialized")
        return
    ch.INFO("fakeroot: initializing for %s" % c["name"])
    for cl in c["first"]:
        ch.INFO("fakeroot: $ %s" % cl)
        args = ["/bin/sh", "-c", cl]
        ch.ch_run_modify(img, args, env)
Ejemplo n.º 6
0
 def copy_src_file(self, src, dst):
     """Copy file src, named by COPY either explicitly or with wildcards, to
      dst. src might be a symlink, but dst is a canonical path. Both must
      be at the top level of the COPY instruction; i.e., this function must
      not be called recursively. If dst is a directory, file should go in
      that directory named src (i.e., the directory creation magic has
      already happened)."""
     assert (os.path.isfile(src))
     assert (not os.path.exists(dst)
             or (os.path.isdir(dst) and not os.path.islink(dst))
             or (os.path.isfile(dst) and not os.path.islink(dst)))
     ch.DEBUG("copying named file: %s -> %s" % (src, dst))
     ch.copy2(src, dst, follow_symlinks=True)
Ejemplo n.º 7
0
 def manifest_load(self):
    """Parse the manifest file and set self.config_hash and
       self.layer_hashes."""
    def bad_key(key):
       ch.FATAL("manifest: %s: no key: %s" % (self.manifest_path, key))
    # read and parse the JSON
    fp = ch.open_(self.manifest_path, "rt", encoding="UTF-8")
    text = ch.ossafe(fp.read, "can't read: %s" % self.manifest_path)
    ch.ossafe(fp.close, "can't close: %s" % self.manifest_path)
    ch.DEBUG("manifest:\n%s" % text)
    try:
       manifest = json.loads(text)
    except json.JSONDecodeError as x:
       ch.FATAL("can't parse manifest file: %s:%d: %s"
                % (self.manifest_path, x.lineno, x.msg))
    # validate schema version
    try:
       version = manifest['schemaVersion']
    except KeyError:
       bad_key("schemaVersion")
    if (version not in {1,2}):
       ch.FATAL("unsupported manifest schema version: %s" % repr(version))
    # load config hash
    #
    # FIXME: Manifest version 1 does not list a config blob. It does have
    # things (plural) that look like a config at history/v1Compatibility as
    # an embedded JSON string :P but I haven't dug into it.
    if (version == 1):
       ch.WARNING("no config; manifest schema version 1")
       self.config_hash = None
    else:  # version == 2
       try:
          self.config_hash = ch.digest_trim(manifest["config"]["digest"])
       except KeyError:
          bad_key("config/digest")
    # load layer hashes
    if (version == 1):
       key1 = "fsLayers"
       key2 = "blobSum"
    else:  # version == 2
       key1 = "layers"
       key2 = "digest"
    if (key1 not in manifest):
       bad_key(key1)
    self.layer_hashes = list()
    for i in manifest[key1]:
       if (key2 not in i):
          bad_key("%s/%s" % (key1, key2))
       self.layer_hashes.append(ch.digest_trim(i[key2]))
    if (version == 1):
       self.layer_hashes.reverse()
Ejemplo n.º 8
0
def main(cli_):

   # CLI namespace. :P
   global cli
   cli = cli_

   # Infer input file if needed.
   if (cli.file is None):
      cli.file = cli.context + "/Dockerfile"

   # Infer image name if needed.
   if (cli.tag is None):
      m = re.search(r"(([^/]+)/)?Dockerfile(\.(.+))?$",
                    os.path.abspath(cli.file))
      if (m is not None):
         if m.group(4):    # extension
            cli.tag = m.group(4)
         elif m.group(2):  # containing directory
            cli.tag = m.group(2)

   # Deal with build arguments.
   def build_arg_get(arg):
      kv = arg.split("=")
      if (len(kv) == 2):
         return kv
      else:
         v = os.getenv(kv[0])
         if (v is None):
            ch.FATAL("--build-arg: %s: no value and not in environment" % kv[0])
         return (kv[0], v)
   if (cli.build_arg is None):
      cli.build_arg = list()
   cli.build_arg = dict( build_arg_get(i) for i in cli.build_arg )

   # Finish CLI initialization.
   ch.DEBUG(cli)
   ch.dependencies_check()

   # Guess whether the context is a URL, and error out if so. This can be a
   # typical looking URL e.g. "https://..." or also something like
   # "[email protected]:...". The line noise in the second line of the regex is
   # to match this second form. Username and host characters from
   # https://tools.ietf.org/html/rfc3986.
   if (re.search(r"""  ^((git|git+ssh|http|https|ssh)://
                     | ^[\w.~%!$&'\(\)\*\+,;=-]+@[\w.~%!$&'\(\)\*\+,;=-]+:)""",
                 cli.context, re.VERBOSE) is not None):
      ch.FATAL("not yet supported: issue #773: URL context: %s" % cli.context)
   if (os.path.exists(cli.context + "/.dockerignore")):
      ch.WARNING("not yet supported, ignored: issue #777: .dockerignore file")

   # Set up build environment.
   global env
   env = Environment()

   # Read input file.
   if (cli.file == "-"):
      text = ch.ossafe(sys.stdin.read, "can't read stdin")
   else:
      fp = ch.open_(cli.file, "rt")
      text = ch.ossafe(fp.read, "can't read: %s" % cli.file)
      fp.close()

   # Parse it.
   parser = lark.Lark("?start: dockerfile\n" + ch.GRAMMAR,
                      parser="earley", propagate_positions=True)
   # Avoid Lark issue #237: lark.exceptions.UnexpectedEOF if the file does not
   # end in newline.
   text += "\n"
   try:
      tree = parser.parse(text)
   except lark.exceptions.UnexpectedInput as x:
      ch.DEBUG(x)  # noise about what was expected in the grammar
      ch.FATAL("can't parse: %s:%d,%d\n\n%s" % (cli.file, x.line, x.column, x.get_context(text, 39)))
   ch.DEBUG(tree.pretty())

   # Sometimes we exit after parsing.
   if (cli.parse_only):
      sys.exit(0)

   # Count the number of stages (i.e., FROM instructions)
   global image_ct
   image_ct = sum(1 for i in ch.tree_children(tree, "from_"))

   # Traverse the tree and do what it says.
   #
   # We don't actually care whether the tree is traversed breadth-first or
   # depth-first, but we *do* care that instruction nodes are visited in
   # order. Neither visit() nor visit_topdown() are documented as of
   # 2020-06-11 [1], but examining source code [2] shows that visit_topdown()
   # uses Tree.iter_trees_topdown(), which *is* documented to be in-order [3].
   #
   # This change seems to have been made in 0.8.6 (see PR #761); before then,
   # visit() was in order. Therefore, we call that instead, if visit_topdown()
   # is not present, to improve compatibility (see issue #792).
   #
   # [1]: https://lark-parser.readthedocs.io/en/latest/visitors/#visitors
   # [2]: https://github.com/lark-parser/lark/blob/445c8d4/lark/visitors.py#L211
   # [3]: https://lark-parser.readthedocs.io/en/latest/classes/#tree
   ml = Main_Loop()
   if (hasattr(ml, 'visit_topdown')):
      ml.visit_topdown(tree)
   else:
      ml.visit(tree)

   # Check that all build arguments were consumed.
   if (len(cli.build_arg) != 0):
      ch.FATAL("--build-arg: not consumed: " + " ".join(cli.build_arg.keys()))

   # Print summary & we're done.
   if (ml.instruction_ct == 0):
      ch.FATAL("no instructions found: %s" % cli.file)
   assert (image_i + 1 == image_ct)  # should have errored already if not
   ch.INFO("grown in %d instructions: %s"
           % (ml.instruction_ct, images[image_i]))
Ejemplo n.º 9
0
 def execute_(self):
    # Complain about unsupported stuff.
    if (self.options.pop("chown", False)):
       self.unsupported_forever_warn("--chown")
    # Any remaining options are invalid.
    self.options_assert_empty()
    # Find the source directory.
    if (self.from_ is None):
       context = cli.context
    else:
       if (self.from_ == image_i or self.from_ == image_alias):
          ch.FATAL("COPY --from: stage %s is the current stage" % self.from_)
       if (not self.from_ in images):
          # FIXME: Would be nice to also report if a named stage is below.
          if (isinstance(self.from_, int) and self.from_ < image_ct):
             if (self.from_ < 0):
                ch.FATAL("COPY --from: invalid negative stage index %d"
                         % self.from_)
             else:
                ch.FATAL("COPY --from: stage %d does not exist yet"
                         % self.from_)
          else:
             ch.FATAL("COPY --from: stage %s does not exist" % self.from_)
       context = images[self.from_].unpack_path
    ch.DEBUG("context: " + context)
    # Do the copy.
    srcs = list()
    for src in self.srcs:
       if (os.path.normpath(src).startswith("..")):
          ch.FATAL("can't COPY: %s climbs outside context" % src)
       for i in glob.glob(context + "/" + src):
          srcs.append(i)
    if (len(srcs) == 0):
       ch.FATAL("can't COPY: no sources exist")
    dst = images[image_i].unpack_path + "/"
    if (not self.dst.startswith("/")):
       dst += env.workdir + "/"
    dst += self.dst
    if (dst.endswith("/") or len(srcs) > 1 or os.path.isdir(srcs[0])):
       # Create destination directory.
       if (dst.endswith("/")):
          dst = dst[:-1]
       if (os.path.exists(dst) and not os.path.isdir(dst)):
          ch.FATAL("can't COPY: %s exists but is not a directory" % dst)
       ch.mkdirs(dst)
    for src in srcs:
       # Check for symlinks to outside context.
       src_real = os.path.realpath(src)
       context_real = os.path.realpath(context)
       if (not os.path.commonpath([src_real, context_real]) \
               .startswith(context_real)):
          ch.FATAL("can't COPY: %s climbs outside context via symlink" % src)
       # Do the copy.
       if (os.path.isfile(src)):   # or symlink to file
          ch.DEBUG("COPY via copy2 file %s to %s" % (src, dst))
          ch.copy2(src, dst, follow_symlinks=True)
       elif (os.path.isdir(src)):  # or symlink to directory
          # Copy *contents* of src, not src itself. Note: shutil.copytree()
          # has a parameter dirs_exist_ok that I think will make this easier
          # in Python 3.8.
          ch.DEBUG("COPY dir %s to %s" % (src, dst))
          if (not os.path.isdir(dst)):
             ch.FATAL("can't COPY: destination not a directory: %s to %s"
                      % (src, dst))
          for src2_basename in ch.ossafe(
                os.listdir, "can't list directory: %s" % src, src):
             src2 = src + "/" + src2_basename
             if (os.path.islink(src2)):
                # Symlinks within directories do not get dereferenced.
                ch.DEBUG("symlink via copy2: %s to %s" % (src2, dst))
                ch.copy2(src2, dst, follow_symlinks=False)
             elif (os.path.isfile(src2)):  # not symlink to file
                ch.DEBUG("file via copy2: %s to %s" % (src2, dst))
                ch.copy2(src2, dst)
             elif (os.path.isdir(src2)):   # not symlink to directory
                dst2 = dst + "/" + src2_basename
                ch.DEBUG("directory via copytree: %s to %s" % (src2, dst2))
                ch.copytree(src2, dst2, symlinks=True,
                            ignore_dangling_symlinks=True)
             else:
                ch.FATAL("can't COPY unknown file type: %s" % src2)
       else:
          ch.FATAL("can't COPY unknown file type: %s" % src)
Ejemplo n.º 10
0
    def manifest_load(self, continue_404=False):
        """Download the manifest file if needed, parse it, and set
         self.config_hash and self.layer_hashes. By default, if the image does
         not exist, exit with error; if continue_404, then log the condition
         but do not exit. In this case, self.config_hash and self.layer_hashes
         will both be None."""
        def bad_key(key):
            ch.FATAL("manifest: %s: no key: %s" % (self.manifest_path, key))

        self.config_hash = None
        self.layer_hashes = None
        # obtain the manifest
        try:
            # internal manifest library, e.g. for "FROM scratch"
            manifest = manifests_internal[str(self.image.ref)]
            ch.INFO("manifest: using internal library")
        except KeyError:
            # download the file if needed, then parse it
            if (ch.arch == "yolo" or self.architectures is None):
                digest = None
            else:
                digest = self.architectures[ch.arch]
            ch.DEBUG("manifest digest: %s" % digest)
            if (os.path.exists(self.manifest_path) and self.use_cache):
                ch.INFO("manifest: using existing file")
            else:
                ch.INFO("manifest: downloading")
                self.registry.manifest_to_file(self.manifest_path,
                                               digest=digest,
                                               continue_404=continue_404)
            if (not os.path.exists(self.manifest_path)):
                # response was 404 (or equivalent)
                ch.INFO("manifest: none found")
                return
            manifest = ch.json_from_file(self.manifest_path, "manifest")
        # validate schema version
        try:
            version = manifest['schemaVersion']
        except KeyError:
            bad_key("schemaVersion")
        if (version not in {1, 2}):
            ch.FATAL("unsupported manifest schema version: %s" % repr(version))
        # load config hash
        #
        # FIXME: Manifest version 1 does not list a config blob. It does have
        # things (plural) that look like a config at history/v1Compatibility as
        # an embedded JSON string :P but I haven't dug into it.
        if (version == 1):
            ch.VERBOSE("no config; manifest schema version 1")
            self.config_hash = None
        else:  # version == 2
            try:
                self.config_hash = manifest["config"]["digest"]
                if (self.config_hash is not None):
                    self.config_hash = ch.digest_trim(self.config_hash)
            except KeyError:
                bad_key("config/digest")
        # load layer hashes
        if (version == 1):
            key1 = "fsLayers"
            key2 = "blobSum"
        else:  # version == 2
            key1 = "layers"
            key2 = "digest"
        if (key1 not in manifest):
            bad_key(key1)
        self.layer_hashes = list()
        for i in manifest[key1]:
            if (key2 not in i):
                bad_key("%s/%s" % (key1, key2))
            self.layer_hashes.append(ch.digest_trim(i[key2]))
        if (version == 1):
            self.layer_hashes.reverse()
Ejemplo n.º 11
0
    def copy_src_dir(self, src, dst):
        """Copy the contents of directory src, named by COPY, either explicitly
         or with wildcards, to dst. src might be a symlink, but dst is a
         canonical path. Both must be at the top level of the COPY
         instruction; i.e., this function must not be called recursively. dst
         must exist already and be a directory. Unlike subdirectories, the
         metadata of dst will not be altered to match src."""
        def onerror(x):
            ch.FATAL("can't scan directory: %s: %s" % (x.filename, x.strerror))

        # Use Path objects in this method because the path arithmetic was
        # getting too hard with strings.
        src = ch.Path(os.path.realpath(src))
        dst = ch.Path(dst)
        assert (os.path.isdir(src) and not os.path.islink(src))
        assert (os.path.isdir(dst) and not os.path.islink(dst))
        ch.DEBUG("copying named directory: %s -> %s" % (src, dst))
        for (dirpath, dirnames, filenames) in os.walk(src, onerror=onerror):
            dirpath = ch.Path(dirpath)
            subdir = dirpath.relative_to(src)
            dst_dir = dst // subdir
            # dirnames can contain symlinks, which we handle as files, so we'll
            # rebuild it; the walk will not descend into those "directories".
            dirnames2 = dirnames.copy()  # shallow copy
            dirnames[:] = list()  # clear in place
            for d in dirnames2:
                d = ch.Path(d)
                src_path = dirpath // d
                dst_path = dst_dir // d
                ch.TRACE("dir: %s -> %s" % (src_path, dst_path))
                if (os.path.islink(src_path)):
                    filenames.append(d)  # symlink, handle as file
                    ch.TRACE("symlink to dir, will handle as file")
                    continue
                else:
                    dirnames.append(d)  # directory, descend into later
                # If destination exists, but isn't a directory, remove it.
                if (os.path.exists(dst_path)):
                    if (os.path.isdir(dst_path)
                            and not os.path.islink(dst_path)):
                        ch.TRACE("dst_path exists and is a directory")
                    else:
                        ch.TRACE("dst_path exists, not a directory, removing")
                        ch.unlink(dst_path)
                # If destination directory doesn't exist, create it.
                if (not os.path.exists(dst_path)):
                    ch.TRACE("mkdir dst_path")
                    ch.ossafe(os.mkdir, "can't mkdir: %s" % dst_path, dst_path)
                # Copy metadata, now that we know the destination exists and is a
                # directory.
                ch.ossafe(shutil.copystat,
                          "can't copy metadata: %s -> %s" %
                          (src_path, dst_path),
                          src_path,
                          dst_path,
                          follow_symlinks=False)
            for f in filenames:
                f = ch.Path(f)
                src_path = dirpath // f
                dst_path = dst_dir // f
                ch.TRACE("file or symlink via copy2: %s -> %s" %
                         (src_path, dst_path))
                if (not (os.path.isfile(src_path)
                         or os.path.islink(src_path))):
                    ch.FATAL("can't COPY: unknown file type: %s" % src_path)
                if (os.path.exists(dst_path)):
                    ch.TRACE("destination exists, removing")
                    if (os.path.isdir(dst_path)
                            and not os.path.islink(dst_path)):
                        ch.rmtree(dst_path)
                    else:
                        ch.unlink(dst_path)
                ch.copy2(src_path, dst_path, follow_symlinks=False)