コード例 #1
0
 def __init__(self, *, api_key, project, **optional_config):
     log.info("Initializing Replicator")
     self._config = optional_config
     self.project = project
     self.service = self.config("service")
     if len(api_key) == 40:
         self.nvcr = DGXRegistry(api_key)
     elif len(api_key) == 84:
         self.nvcr = NGCRegistry(api_key)
     else:
         raise RuntimeError("Unable to recognize the API key")
     self.nvcr_client = DockerClient()
     self.nvcr_client.login(username="******",
                            password=api_key,
                            registry="nvcr.io/v2")
     self.registry_client = None
     self.min_version = self.config("min_version")
     self.py_version = self.config("py_version")
     self.images = self.config("image") or []
     self.progress = Progress(uri=self.config("progress_uri"))
     if self.config("registry_url"):
         self.registry_url = self.config("registry_url")
         self.registry_client = DockerClient()
         if self.config("registry_username") and self.config(
                 "registry_password"):
             self.registry_client.login(
                 username=self.config("registry_username"),
                 password=self.config("registry_password"),
                 registry=self.config("registry_url"))
     self.output_path = self.config("output_path") or "/output"
     self.state_path = os.path.join(self.output_path, "state.yml")
     self.state = collections.defaultdict(dict)
     if os.path.exists(self.state_path):
         with open(self.state_path, "r") as file:
             tmp = yaml.load(file)
         if tmp:
             for key, val in tmp.items():
                 self.state[key] = val
     self.export_to_tarfile = self.config("exporter")
     self.third_party_images = []
     if self.config("external_images"):
         self.third_party_images.extend(self.read_external_images_file())
     if self.export_to_tarfile:
         log.info("tarfiles will be saved to {}".format(self.output_path))
     self.export_to_singularity = self.config("singularity")
     if self.export_to_singularity:
         log.info("singularity images will be saved to {}".format(
             self.output_path))
     log.info("Replicator initialization complete")
コード例 #2
0
class Replicator:
    def __init__(self, *, api_key, project, **optional_config):
        log.info("Initializing Replicator")
        self._config = optional_config
        self.project = project
        self.service = self.config("service")
        if len(api_key) == 40:
            self.nvcr = DGXRegistry(api_key)
        elif len(api_key) == 84:
            self.nvcr = NGCRegistry(api_key)
        else:
            raise RuntimeError("Unable to recognize the API key")
        self.nvcr_client = DockerClient()
        self.nvcr_client.login(username="******",
                               password=api_key,
                               registry="nvcr.io/v2")
        self.registry_client = None
        self.min_version = self.config("min_version")
        self.py_version = self.config("py_version")
        self.images = self.config("image") or []
        self.progress = Progress(uri=self.config("progress_uri"))
        if self.config("registry_url"):
            self.registry_url = self.config("registry_url")
            self.registry_client = DockerClient()
            if self.config("registry_username") and self.config(
                    "registry_password"):
                self.registry_client.login(
                    username=self.config("registry_username"),
                    password=self.config("registry_password"),
                    registry=self.config("registry_url"))
        self.output_path = self.config("output_path") or "/output"
        self.state_path = os.path.join(self.output_path, "state.yml")
        self.state = collections.defaultdict(dict)
        if os.path.exists(self.state_path):
            with open(self.state_path, "r") as file:
                tmp = yaml.load(file)
            if tmp:
                for key, val in tmp.items():
                    self.state[key] = val
        self.export_to_tarfile = self.config("exporter")
        self.third_party_images = []
        if self.config("external_images"):
            self.third_party_images.extend(self.read_external_images_file())
        if self.export_to_tarfile:
            log.info("tarfiles will be saved to {}".format(self.output_path))
        self.export_to_singularity = self.config("singularity")
        if self.export_to_singularity:
            log.info("singularity images will be saved to {}".format(
                self.output_path))
        log.info("Replicator initialization complete")

    def read_external_images_file(self):
        with open(self.config("external_images"), "r") as file:
            data = yaml.load(file)
        images = data.get("images", [])
        images = [
            replicator_pb2.DockerImage(name=image["name"],
                                       tag=image.get("tag", "latest"))
            for image in images
        ]
        return images

    def config(self, key, default=None):
        return self._config.get(key, default)

    def save_state(self):
        with open(self.state_path, "w") as file:
            yaml.dump(self.state, file)

    def sync(self, project=None):
        log.info("Replicator Started")

        # pull images
        new_images = {
            image.name: image.tag
            for image in self.sync_images(project=project)
        }

        # pull image descriptions - new_images should be empty for dry runs
        self.progress.update_step(key="markdown", status="running")
        self.update_progress()
        descriptions = self.nvcr.get_image_descriptions(project=project)
        for image_name, _ in new_images.items():
            markdown = os.path.join(
                self.output_path,
                "description_{}.md".format(image_name.replace('/', '%%')))
            with open(markdown, "w") as out:
                out.write(descriptions.get(image_name, ""))
        self.progress.update_step(key="markdown", status="complete")
        self.update_progress()
        log.info("Replicator finished")

    def sync_images(self, project=None):
        project = project or self.project
        for image in self.images_to_download(project=project):
            if self.config("dry_run"):
                click.echo("[dry-run] clone_image({}, {}, {})".format(
                    image.name, image.tag, image.docker_id))
                continue
            log.info("Pulling {}:{}".format(image.name, image.tag))
            self.clone_image(image.name, image.tag,
                             image.docker_id)  # independent
            self.state[image.name][image.tag] = image.docker_id  # dep [clone]
            log.info("Saving state {}:{}".format(image.name, image.tag))
            self.save_state(
            )  # Added to save state each time new image is cloned in
            yield image
        self.save_state()

    def images_to_download(self, project=None):
        project = project or self.project

        self.progress.add_step(key="query",
                               status="running",
                               header="Getting list of Docker images to clone")
        self.update_progress(progress_length_unknown=True)

        # determine images and tags (and dockerImageIds) from the remote registry
        filter_fn = self.filter_on_tag if self.min_version or self.images else None
        remote_state = self.nvcr.get_state(project=project,
                                           filter_fn=filter_fn)

        # determine which images need to be fetch for the local state to match the remote
        to_pull = self.missing_images(remote_state)

        # sort images into two buckets: cuda and not cuda
        cuda_images = {
            key: val
            for key, val in to_pull.items() if key.endswith("cuda")
        }
        other_images = {
            key: val
            for key, val in to_pull.items() if not key.endswith("cuda")
        }

        all_images = [image for image in self.images_from_state(cuda_images)]
        all_images.extend(
            [image for image in self.images_from_state(other_images)])

        if self.config("external_images"):
            all_images.extend(self.third_party_images)

        for image in all_images:
            self.progress.add_step(key="{}:{}".format(image.name, image.tag),
                                   header="Cloning {}:{}".format(
                                       image.name, image.tag),
                                   subHeader="Waiting to pull image")
        self.progress.add_step(
            key="markdown", header="Downloading NVIDIA Deep Learning READMEs")
        self.progress.update_step(key="query", status="complete")
        self.update_progress()

        for image in self.images_from_state(cuda_images):
            yield image

        for image in self.images_from_state(other_images):
            yield image

        if self.config("external_images"):
            for image in self.third_party_images:
                yield image

    def update_progress(self, progress_length_unknown=False):
        self.progress.post(progress_length_unknown=progress_length_unknown)

    @staticmethod
    def images_from_state(state):
        for image_name, tag_data in state.items():
            for tag, docker_id in tag_data.items():
                yield replicator_pb2.DockerImage(name=image_name,
                                                 tag=tag,
                                                 docker_id=docker_id.get(
                                                     "docker_id", ""))

    def clone_image(self, image_name, tag, docker_id):
        if docker_id:
            url = self.nvcr.docker_url(image_name, tag=tag)
        else:
            url = "{}:{}".format(image_name, tag)
        if self.export_to_tarfile:
            tarfile = self.nvcr_client.url2filename(url)
            if os.path.exists(tarfile):
                log.warning(
                    "{} exists; removing and rebuilding".format(tarfile))
                os.remove(tarfile)
            log.info("cloning %s --> %s" % (url, tarfile))
            self.progress.update_step(key="{}:{}".format(image_name, tag),
                                      status="running",
                                      subHeader="Pulling image from Registry")
            self.update_progress()
            self.nvcr_client.pull(url)
            self.progress.update_step(key="{}:{}".format(image_name, tag),
                                      status="running",
                                      subHeader="Saving image to tarfile")
            self.update_progress()
            self.nvcr_client.save(url, path=self.output_path)
            self.progress.update_step(key="{}:{}".format(image_name, tag),
                                      status="complete",
                                      subHeader="Saved {}".format(tarfile))
            log.info("Saved image: %s --> %s" % (url, tarfile))
        if self.export_to_singularity:
            sif = os.path.join(self.output_path,
                               "{}.sif".format(url).replace("/", "_"))
            if os.path.exists(sif):
                log.warning("{} exists; removing and rebuilding".format(sif))
                os.remove(sif)
            log.info("cloning %s --> %s" % (url, sif))
            self.progress.update_step(key="{}:{}".format(image_name, tag),
                                      status="running",
                                      subHeader="Pulling image from Registry")
            self.update_progress()
            self.nvcr_client.pull(url)
            self.progress.update_step(
                key="{}:{}".format(image_name, tag),
                status="running",
                subHeader="Saving image to singularity image file")
            self.update_progress()
            utils.execute("singularity build {} docker-daemon://{}".format(
                sif, url))
            self.progress.update_step(key="{}:{}".format(image_name, tag),
                                      status="complete",
                                      subHeader="Saved {}".format(sif))
            log.info("Saved image: %s --> %s" % (url, sif))
        if self.registry_client:
            push_url = "{}/{}:{}".format(self.registry_url, image_name, tag)
            self.nvcr_client.pull(url)
            self.registry_client.tag(url, push_url)
            self.registry_client.push(push_url)
            self.registry_client.remove(push_url)
        if not self.config("no_remove") and not image_name.endswith(
                "cuda") and self.nvcr_client.get(url=url):
            try:
                self.nvcr_client.remove(url)
            except:
                log.warning(
                    "tried to remove docker image {}, but unexpectedly failed".
                    format(url))
        return image_name, tag, docker_id

    def filter_on_tag(self, *, name, tag, docker_id):
        """
        Filter function used by the `nvidia_deepops` library for selecting images.

        Return True if the name/tag/docker_id combo should be included for consideration.
        Return False and the image will be excluded from consideration, i.e. not cloned/replicated.
        """
        if self.images:
            log.debug("filtering on images name, only allow {}".format(
                self.images))
            found = False
            for image in self.images:
                image = "{}/{}".format(self.project, image)
                if image == name:  # Using exact tag filtering instead of 'in'
                    log.debug("{} passes filter; matches {}".format(
                        name, image))
                    found = True
            if not found:
                log.debug("{} fails filter by image name".format(name))
                return False
        # if you are here, you have passed the name test
        # now, we check the version of the container by trying to extract the YY.MM details from the tag
        if self.py_version:
            if tag.find(self.py_version) == -1:
                log.debug("tag {} fails py_version {} filter".format(
                    tag, self.py_version))
                return False
        version_regex = re.compile(r"^(\d\d\.\d\d)")
        float_tag = version_regex.findall(tag)
        if float_tag and len(float_tag) == 1:
            try:
                # this is a bit ugly, but if for some reason the cast of float_tag[0] or min_verison fail
                # we fallback to safety and skip tag filtering
                val = float(float_tag[0])
                lower_bound = float(self.min_version)
                if val < lower_bound:
                    return False
            except Exception:
                pass
        # if you are here, you have passed the tag test
        return True

    def missing_images(self, remote):
        """
        Generates a dict of dicts on a symmetric difference between remote/local which also includes
        any image/tag pair in both but with differing dockerImageIds.
        :param remote: `image_name:tag:docker_id` of remote content
        :param local: `image_name:tag:docker_id` of local content
        :return: `image_name:tag:docker_id` for each missing or different entry in remote but not in local
        """
        to_pull = collections.defaultdict(dict)
        local = self.state

        # determine which images are not present
        image_names = set(remote.keys()) - set(local.keys())
        for image_name in image_names:
            to_pull[image_name] = remote[image_name]

        # log.debug("remote image names: %s" % remote.keys())
        # log.debug("local  image names: %s" % local.keys())
        log.debug("image names not present: %s" % to_pull.keys())

        # determine which tags are not present
        for image_name, tag_data in remote.items():
            tags = set(tag_data.keys()) - set(local[image_name].keys())
            # log.debug("remote %s tags: %s" % (image_name, tag_data.keys()))
            # log.debug("local  %s tags: %s" % (image_name, local[image_name].keys()))
            log.debug("tags not present for image {}: {}".format(
                image_name, tags))
            for tag in tags:
                to_pull[image_name][tag] = remote[image_name][tag]

        # determine if any name/tag pairs have a different dockerImageId than previously seen
        # this handles the cases where someone push a new images and overwrites a name:tag image
        for image_name, tag_data in remote.items():
            if image_name not in local: continue
            for tag, docker_id in tag_data.items():
                if tag not in local[image_name]: continue
                if docker_id.get("docker_id") != local[image_name][tag]:
                    log.debug("%s:%s changed on server" % (image_name, tag))
                    to_pull[image_name][tag] = docker_id

        log.info("images to be fetched: %s" %
                 pprint.pformat(to_pull, indent=4))
        return to_pull