Esempio n. 1
0
class SimpleRepoMan(RepoMan):
    """Use a simple static list of images to cache."""
    def __init__(self, body: Dict[str, Any]):
        """Create a new SimpleRepoMan.

        Parameters
        ----------
        body: JSON body for the SimpleRepoMan.  Defined also in post.json
          Contains a JSON list of images that have an image_url and image_name
          field.
        """
        self._images = DockerImageList()
        self._images.load(body["images"])

    async def desired_images(
            self, common_cache: List[CachedDockerImage]) -> DesiredImageList:
        """Return the static list of desired images.

        Parameters
        ----------
        common_cache: unused.  The current cache of images on all nodes.

        Returns
        -------
        List of images to cache, which come directly from the JSON post body.
        """
        return DesiredImageList(self._images, DockerImageList())
Esempio n. 2
0
    def __init__(self, body: Dict[str, Any]):
        """Create a new SimpleRepoMan.

        Parameters
        ----------
        body: JSON body for the SimpleRepoMan.  Defined also in post.json
          Contains a JSON list of images that have an image_url and image_name
          field.
        """
        self._images = DockerImageList()
        self._images.load(body["images"])
Esempio n. 3
0
    def __init__(
        self,
        name: str,
        labels: KubernetesLabels,
        repomen: Sequence[RepoMan],
    ):
        self.all_images = DockerImageList()
        self.available_images = DockerImageList()
        self.common_cache = DockerImageList()
        self.desired_images = DockerImageList()
        self.images_to_cache = DockerImageList()

        self.name = name
        self.labels = labels
        self.repomen = repomen

        self.kubernetes = KubernetesClient()
Esempio n. 4
0
 def to_dockerimagelist(self, name_is_tag: bool = False) -> DockerImageList:
     image_list = DockerImageList()
     nonempty_tags = [t for t in self.all_tags.copy() if t is not None]
     image_list.load(
         [
             {
                 "image_url": t.image_ref,
                 "image_hash": (t.digest or ""),
                 "name": (
                     lambda name_is_tag: t.tag
                     if name_is_tag
                     else t.display_name
                 )(name_is_tag),
             }
             for t in nonempty_tags
         ]
     )
     return image_list
Esempio n. 5
0
    async def desired_images(
            self, common_cache: List[CachedDockerImage]) -> DesiredImageList:
        """Return the static list of desired images.

        Parameters
        ----------
        common_cache: unused.  The current cache of images on all nodes.

        Returns
        -------
        List of images to cache, which come directly from the JSON post body.
        """
        return DesiredImageList(self._images, DockerImageList())
Esempio n. 6
0
    async def desired_images(
            self, common_cache: List[CachedDockerImage]) -> DesiredImageList:
        """Retrieve the list of images to pull, based on the registry.

        Parameters
        ----------
        common_cache: The current list of images available on all nodes
        associated with this cachemachine.

        Returns
        -------
        List of docker images to ensure are pulled, some of which may
        already be pulled and in the common cache.
        """
        # Sort the tags lexically and in reverse, which should give the
        # most recent builds above the older builds.  At this point, all
        # the dailies, weeklies, releases, and recommended are in here.
        tags = sorted(await self.docker_client.list_tags(), reverse=True)

        pull_images = DockerImageList()

        all_tags: List[RubinTag] = []

        hashcache = RubinHashCache.from_cache(common_cache)
        for t in tags:
            # If there are alias tags, we will replace this object later with
            # a richer one containing data from those tags.
            image_url = f"{self.registry_url}/{self.repo}:{t}"
            tagobj = RubinTag.from_tag(
                tag=t,
                image_ref=image_url,
                alias_tags=self.alias_tags,
                override_name="",
                digest=hashcache.tag_to_hash.get(t),
            )
            if t in self.alias_tags:
                tag_cycle: Optional[int] = None
                image_hash = await self.docker_client.get_image_hash(t)
                # Now use the inverse hash cache we built to get any other
                #  tags corresponding to that digest
                display_name = RubinTag.prettify_tag(t)
                other_tags = hashcache.hash_to_tags.get(image_hash)
                if other_tags:
                    other_tagobjs: Set[RubinTag] = set()
                    for other_tag in other_tags:
                        candidate = RubinTag.from_tag(
                            tag=other_tag,
                            image_ref=(f"{self.registry_url}/{self.repo}" +
                                       f":{other_tag}"),
                            digest=image_hash,
                            alias_tags=self.alias_tags,
                        )
                        if candidate.is_recognized():
                            # Only add recognized, resolved images
                            other_tagobjs.add(candidate)
                        # Use the candidate cycle if it is set.
                        # Unless something is really wrong, we won't have
                        # different cycle numbers for the same image
                        if candidate.cycle:
                            tag_cycle = candidate.cycle
                    more_names = sorted(
                        [x.display_name for x in other_tagobjs], reverse=True)
                    display_name += f" ({', '.join(more_names)})"
                # Now that we know more about the tagged image, recreate
                # the RubinTag object with the additional info.
                tagobj = RubinTag.from_tag(
                    tag=t,
                    image_ref=image_url,
                    alias_tags=self.alias_tags,
                    override_name=display_name,
                    digest=image_hash,
                    override_cycle=tag_cycle,
                )
            if self.verify_tagobj_cycle(tagobj):
                # If we are in a cycle-aware environment, only use the
                #  recommended or aliased image if the cycle matches.
                if t == self.recommended_tag:
                    # The point of the "recommended_tag" is that it is always
                    # pulled and put at the front of the pulled-image list.
                    # We want to do this check after we resolve aliases
                    # so that the tag object has a digest and the accurately-
                    # resolved display name.
                    pull_images.insert(
                        0,  # At the front (not that it matters here)
                        DockerImage(
                            image_url=tagobj.image_ref,
                            image_hash=tagobj.digest,
                            name=tagobj.display_name,
                        ),
                    )
                elif t in self.alias_tags:
                    # Alias tags should, I guess, go after recommended but
                    # before the others?
                    pull_images.append(
                        DockerImage(
                            image_url=tagobj.image_ref,
                            image_hash=tagobj.digest,
                            name=tagobj.display_name,
                        ), )
                all_tags.append(tagobj)

        taglist = RubinTagList(all_tags)

        # Note that for the dropdown, we want to display the tag, rather
        # than its associated display name.
        all_images = taglist.to_dockerimagelist(name_is_tag=True)
        pull_images.extend(
            taglist.sorted_images(RubinTagType.RELEASE,
                                  count=self.num_releases))
        pull_images.extend(
            taglist.sorted_images(RubinTagType.WEEKLY,
                                  count=self.num_weeklies))
        pull_images.extend(
            taglist.sorted_images(RubinTagType.DAILY, count=self.num_dailies))
        logger.info(f"Returning {pull_images}")
        return DesiredImageList(pull_images, all_images)
Esempio n. 7
0
    def inspect_node_caches(self) -> None:
        nodes = self.kubernetes.list_nodes()

        first_node = True
        common_cache = DockerImageList()

        for n in nodes:
            # Skip nodes that are cordoned or have a taint.  They're both
            # irrelevant since labs don't spawn there and cannot be affected
            # by our caching since our DaemonSet won't run there.
            if n.spec.unschedulable or n.spec.taints:
                continue

            # Do the labels we are looking for match this node?
            if self.labels.matches(n.metadata.labels):
                # This is a bit tricky.  The images is a list,
                # each item containing a particular image, and containing
                # a list of all the names it is known by.
                node_images = DockerImageList()
                for i in n.status.images:
                    # Each of these "names" can either be a docker image
                    # url that has a hash or a tag in it. (although, with
                    # where the @ sign is, I'm not sure if it really
                    # counts).  Also, images that have the same hash (and
                    # are therefore the same image), but different
                    # repositories can exist if they share that hash.
                    # But each repository has an entry with the hash and
                    # additional entries with the tags.
                    # Example:
                    #
                    # ['docker.io/lsstsqre/sciplat-lab@sha256:be4...a7',
                    #  'registry.hub.docker.com/lsstsqre/sciplat-lab@sha256:be4...a7',
                    #  'docker.io/lsstsqre/sciplat-lab:recommended',
                    #  'registry.hub.docker.com/lsstsqre/sciplat-lab:recommended',
                    #  'registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_05']

                    # Let's store everything by repository, then collate the
                    # tags and hash.
                    entries: Dict[str, ImageEntry] = defaultdict(ImageEntry)
                    for url in i.names:
                        if url == "<none>@<none>" or url == "<none>:<none>":
                            pass
                        elif "@sha256:" in url:
                            (repository, image_hash) = url.split("@")
                            entries[repository].image_hash = image_hash
                        else:
                            (repository, new_tag) = url.split(":")
                            if new_tag not in entries[repository].tags:
                                entries[repository].tags.append(new_tag)

                    for repository, ie in entries.items():
                        for t in ie.tags:
                            other_tags = list(ie.tags)
                            other_tags.remove(t)

                            if ie.image_hash is None:
                                logger.debug(
                                    f"{repository} : {ie.tags} has no hash")
                            else:
                                node_images.append(
                                    CachedDockerImage(
                                        image_url=f"{repository}:{t}",
                                        image_hash=ie.image_hash,
                                        tags=other_tags,
                                    ))

                if first_node:
                    # This is the first node we're looking at
                    common_cache = node_images
                    first_node = False
                else:
                    # Calculate what images are available on this node and all
                    # the previously inspected nodes.
                    new_common_cache = DockerImageList()

                    for common_image in common_cache:
                        for node_image in node_images:
                            if (common_image.image_hash
                                    == node_image.image_hash
                                    and common_image.image_url
                                    == node_image.image_url):
                                # If we find something that is the same hash,
                                # take the union of these tags.  It could be
                                # any of the tags found.
                                for t in node_image.tags:
                                    if t not in common_image.tags:
                                        common_image.tags.append(t)

                                new_common_cache.append(common_image)

                    common_cache = new_common_cache

        self.common_cache = common_cache
Esempio n. 8
0
    async def do_work(self) -> None:
        while True:
            try:
                all_images = DockerImageList()
                available_images = DockerImageList()
                desired_images = DockerImageList()
                images_to_cache = DockerImageList()

                self.inspect_node_caches()

                for r in self.repomen:
                    dsi = await r.desired_images(self.common_cache)

                    for image in dsi.desired_images:
                        desired_images.append(image)

                        available = False
                        for i in self.common_cache:
                            if i.image_url == image.image_url and (
                                    image.image_hash is None
                                    or i.image_hash == image.image_hash):
                                available_images.append(image)
                                available = True

                        if not available:
                            images_to_cache.append(image)

                    all_images.extend(dsi.all_images)

                if not self.caching() and images_to_cache:
                    self.start_caching(images_to_cache[0].image_url)

                self.all_images = all_images
                self.available_images = available_images
                self.desired_images = desired_images
                self.images_to_cache = images_to_cache
            except Exception:
                logger.exception("Exception caching images")

            await _wait()
Esempio n. 9
0
class CacheMachine:
    def __init__(
        self,
        name: str,
        labels: KubernetesLabels,
        repomen: Sequence[RepoMan],
    ):
        self.all_images = DockerImageList()
        self.available_images = DockerImageList()
        self.common_cache = DockerImageList()
        self.desired_images = DockerImageList()
        self.images_to_cache = DockerImageList()

        self.name = name
        self.labels = labels
        self.repomen = repomen

        self.kubernetes = KubernetesClient()

    # Note, doesn't actually return, intended to run forever.
    async def do_work(self) -> None:
        while True:
            try:
                all_images = DockerImageList()
                available_images = DockerImageList()
                desired_images = DockerImageList()
                images_to_cache = DockerImageList()

                self.inspect_node_caches()

                for r in self.repomen:
                    dsi = await r.desired_images(self.common_cache)

                    for image in dsi.desired_images:
                        desired_images.append(image)

                        available = False
                        for i in self.common_cache:
                            if i.image_url == image.image_url and (
                                    image.image_hash is None
                                    or i.image_hash == image.image_hash):
                                available_images.append(image)
                                available = True

                        if not available:
                            images_to_cache.append(image)

                    all_images.extend(dsi.all_images)

                if not self.caching() and images_to_cache:
                    self.start_caching(images_to_cache[0].image_url)

                self.all_images = all_images
                self.available_images = available_images
                self.desired_images = desired_images
                self.images_to_cache = images_to_cache
            except Exception:
                logger.exception("Exception caching images")

            await _wait()

    def inspect_node_caches(self) -> None:
        nodes = self.kubernetes.list_nodes()

        first_node = True
        common_cache = DockerImageList()

        for n in nodes:
            # Skip nodes that are cordoned or have a taint.  They're both
            # irrelevant since labs don't spawn there and cannot be affected
            # by our caching since our DaemonSet won't run there.
            if n.spec.unschedulable or n.spec.taints:
                continue

            # Do the labels we are looking for match this node?
            if self.labels.matches(n.metadata.labels):
                # This is a bit tricky.  The images is a list,
                # each item containing a particular image, and containing
                # a list of all the names it is known by.
                node_images = DockerImageList()
                for i in n.status.images:
                    # Each of these "names" can either be a docker image
                    # url that has a hash or a tag in it. (although, with
                    # where the @ sign is, I'm not sure if it really
                    # counts).  Also, images that have the same hash (and
                    # are therefore the same image), but different
                    # repositories can exist if they share that hash.
                    # But each repository has an entry with the hash and
                    # additional entries with the tags.
                    # Example:
                    #
                    # ['docker.io/lsstsqre/sciplat-lab@sha256:be4...a7',
                    #  'registry.hub.docker.com/lsstsqre/sciplat-lab@sha256:be4...a7',
                    #  'docker.io/lsstsqre/sciplat-lab:recommended',
                    #  'registry.hub.docker.com/lsstsqre/sciplat-lab:recommended',
                    #  'registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_05']

                    # Let's store everything by repository, then collate the
                    # tags and hash.
                    entries: Dict[str, ImageEntry] = defaultdict(ImageEntry)
                    for url in i.names:
                        if url == "<none>@<none>" or url == "<none>:<none>":
                            pass
                        elif "@sha256:" in url:
                            (repository, image_hash) = url.split("@")
                            entries[repository].image_hash = image_hash
                        else:
                            (repository, new_tag) = url.split(":")
                            if new_tag not in entries[repository].tags:
                                entries[repository].tags.append(new_tag)

                    for repository, ie in entries.items():
                        for t in ie.tags:
                            other_tags = list(ie.tags)
                            other_tags.remove(t)

                            if ie.image_hash is None:
                                logger.debug(
                                    f"{repository} : {ie.tags} has no hash")
                            else:
                                node_images.append(
                                    CachedDockerImage(
                                        image_url=f"{repository}:{t}",
                                        image_hash=ie.image_hash,
                                        tags=other_tags,
                                    ))

                if first_node:
                    # This is the first node we're looking at
                    common_cache = node_images
                    first_node = False
                else:
                    # Calculate what images are available on this node and all
                    # the previously inspected nodes.
                    new_common_cache = DockerImageList()

                    for common_image in common_cache:
                        for node_image in node_images:
                            if (common_image.image_hash
                                    == node_image.image_hash
                                    and common_image.image_url
                                    == node_image.image_url):
                                # If we find something that is the same hash,
                                # take the union of these tags.  It could be
                                # any of the tags found.
                                for t in node_image.tags:
                                    if t not in common_image.tags:
                                        common_image.tags.append(t)

                                new_common_cache.append(common_image)

                    common_cache = new_common_cache

        self.common_cache = common_cache

    def start_caching(self, image_url: str) -> None:
        self.kubernetes.daemonset_create(
            self.name,
            image_url,
            Configuration().docker_secret_name,
            self.labels,
        )

    def caching(self) -> bool:
        try:
            finished = self.kubernetes.daemonset_finished(self.name)
            if finished:
                self.kubernetes.daemonset_delete(self.name)
                return False
            else:
                return True
        except KubernetesDaemonsetNotFound:
            return False

    def dump(self) -> Dict[str, Any]:
        return {
            "name": self.name,
            "labels": self.labels,
            "common_cache": self.common_cache.dump(),
            "all_images": self.all_images.dump(),
            "available_images": self.available_images.dump(),
            "desired_images": self.desired_images.dump(),
            "images_to_cache": self.images_to_cache.dump(),
        }