Esempio n. 1
0
def _lookup_repo_storages_by_content_checksum(repo, checksums, model_class):
    assert checksums

    # There may be many duplicates of the checksums, so for performance reasons we are going
    # to use a union to select just one storage with each checksum
    queries = []

    for counter, checksum in enumerate(checksums):
        query_alias = "q{0}".format(counter)

        candidate_subq = (
            ImageStorage.select(
                ImageStorage.id,
                ImageStorage.content_checksum,
                ImageStorage.image_size,
                ImageStorage.uuid,
                ImageStorage.cas_path,
                ImageStorage.uncompressed_size,
                ImageStorage.uploading,
            )
            .join(model_class)
            .where(model_class.repository == repo, ImageStorage.content_checksum == checksum)
            .limit(1)
            .alias(query_alias)
        )

        queries.append(ImageStorage.select(SQL("*")).from_(candidate_subq))

    assert queries
    return _basequery.reduce_as_tree(queries)
Esempio n. 2
0
def _orphaned_storage_query(candidate_ids):
    """ Returns the subset of the candidate ImageStorage IDs representing storages that are no
      longer referenced by images.
  """
    # Issue a union query to find all storages that are still referenced by a candidate storage. This
    # is much faster than the group_by and having call we used to use here.
    nonorphaned_queries = []
    for counter, candidate_id in enumerate(candidate_ids):
        query_alias = 'q{0}'.format(counter)

        # TODO: remove the join with Image once fully on the OCI data model.
        storage_subq = (ImageStorage.select(ImageStorage.id).join(Image).where(
            ImageStorage.id == candidate_id).limit(1).alias(query_alias))

        nonorphaned_queries.append(
            ImageStorage.select(SQL('*')).from_(storage_subq))

        manifest_storage_subq = (ImageStorage.select(
            ImageStorage.id).join(ManifestBlob).where(
                ImageStorage.id == candidate_id).limit(1).alias(query_alias))

        nonorphaned_queries.append(
            ImageStorage.select(SQL('*')).from_(manifest_storage_subq))

    # Build the set of storages that are missing. These storages are orphaned.
    nonorphaned_storage_ids = {
        storage.id
        for storage in _basequery.reduce_as_tree(nonorphaned_queries)
    }
    return list(candidate_ids - nonorphaned_storage_ids)
Esempio n. 3
0
    def placements_to_filtered_paths_set(placements_list):
        """
        Returns the list of paths to remove from storage, filtered from the given placements query
        by removing any CAS paths that are still referenced by storage(s) in the database.
        """
        if not placements_list:
            return set()

        with ensure_under_transaction():
            # Find the content checksums not referenced by other storages. Any that are, we cannot
            # remove.
            content_checksums = set([
                placement.storage.content_checksum
                for placement in placements_list if placement.storage.cas_path
            ])

            unreferenced_checksums = set()
            if content_checksums:
                # Check the current image storage.
                query = ImageStorage.select(
                    ImageStorage.content_checksum
                ).where(
                    ImageStorage.content_checksum << list(content_checksums))
                is_referenced_checksums = set([
                    image_storage.content_checksum for image_storage in query
                ])
                if is_referenced_checksums:
                    logger.warning(
                        "GC attempted to remove CAS checksums %s, which are still IS referenced",
                        is_referenced_checksums,
                    )

                # Check the ApprBlob table as well.
                query = ApprBlob.select(ApprBlob.digest).where(
                    ApprBlob.digest << list(content_checksums))
                appr_blob_referenced_checksums = set(
                    [blob.digest for blob in query])
                if appr_blob_referenced_checksums:
                    logger.warning(
                        "GC attempted to remove CAS checksums %s, which are ApprBlob referenced",
                        appr_blob_referenced_checksums,
                    )

                unreferenced_checksums = (content_checksums -
                                          appr_blob_referenced_checksums -
                                          is_referenced_checksums)

            # Return all placements for all image storages found not at a CAS path or with a content
            # checksum that is referenced.
            return {
                (
                    get_image_location_for_id(placement.location_id).name,
                    get_layer_path(placement.storage),
                    placement.storage.content_checksum,
                )
                for placement in placements_list
                if not placement.storage.cas_path
                or placement.storage.content_checksum in unreferenced_checksums
            }
Esempio n. 4
0
def _get_dangling_storage_count():
    storage_ids = set([current.id for current in ImageStorage.select()])
    referenced_by_image = set([image.storage_id for image in Image.select()])
    referenced_by_manifest = set([blob.blob_id for blob in ManifestBlob.select()])
    referenced_by_derived = set(
        [derived.derivative_id for derived in DerivedStorageForImage.select()]
    )
    return len(storage_ids - referenced_by_image - referenced_by_derived - referenced_by_manifest)
Esempio n. 5
0
def lookup_repo_storages_by_content_checksum(repo,
                                             checksums,
                                             by_manifest=False):
    """
    Looks up repository storages (without placements) matching the given repository and checksum.
    """
    if not checksums:
        return []

    # There may be many duplicates of the checksums, so for performance reasons we are going
    # to use a union to select just one storage with each checksum
    queries = []

    for counter, checksum in enumerate(set(checksums)):
        query_alias = "q{0}".format(counter)

        # TODO: Remove once we have a new-style model for tracking temp uploaded blobs and
        # all legacy tables have been removed.
        if by_manifest:
            candidate_subq = (ImageStorage.select(
                ImageStorage.id,
                ImageStorage.content_checksum,
                ImageStorage.image_size,
                ImageStorage.uuid,
                ImageStorage.cas_path,
                ImageStorage.uncompressed_size,
                ImageStorage.uploading,
            ).join(ManifestBlob).where(
                ManifestBlob.repository == repo, ImageStorage.content_checksum
                == checksum).limit(1).alias(query_alias))
        else:
            candidate_subq = (ImageStorage.select(
                ImageStorage.id,
                ImageStorage.content_checksum,
                ImageStorage.image_size,
                ImageStorage.uuid,
                ImageStorage.cas_path,
                ImageStorage.uncompressed_size,
                ImageStorage.uploading,
            ).join(Image).where(Image.repository == repo,
                                ImageStorage.content_checksum ==
                                checksum).limit(1).alias(query_alias))

        queries.append(ImageStorage.select(SQL("*")).from_(candidate_subq))

    return _basequery.reduce_as_tree(queries)
Esempio n. 6
0
File: blob.py Progetto: quay/quay
def _lookup_blob_in_repository(repository, blob_digest):
    try:
        return (ImageStorage.select(
            ImageStorage.uuid).join(ManifestBlob).where(
                ManifestBlob.repository == repository,
                ImageStorage.content_checksum == blob_digest,
            ).get())
    except ImageStorage.DoesNotExist:
        return None
Esempio n. 7
0
def verify_placements():
    encountered = set()

    iterator = yield_random_entries(
        lambda: ImageStorage.select().where(ImageStorage.uploading == False),
        ImageStorage.id,
        1000,
        ImageStorage.select(fn.Max(ImageStorage.id)).scalar(),
        1,
    )

    for storage_row, abt, _ in iterator:
        if storage_row.id in encountered:
            continue

        encountered.add(storage_row.id)

        logger.info("Checking placements for storage `%s`", storage_row.uuid)
        try:
            with_locations = model.storage.get_storage_by_uuid(
                storage_row.uuid)
        except model.InvalidImageException:
            logger.exception("Could not find storage `%s`", storage_row.uuid)
            continue

        storage_path = model.storage.get_layer_path(storage_row)
        locations_to_check = set(with_locations.locations)
        if locations_to_check:
            logger.info("Checking locations `%s` for storage `%s`",
                        locations_to_check, storage_row.uuid)
            for location in locations_to_check:
                logger.info("Checking location `%s` for storage `%s`",
                            location, storage_row.uuid)
                if not storage.exists([location], storage_path):
                    location_row = _get_location_row(location)
                    logger.info(
                        "Location `%s` is missing for storage `%s`; removing",
                        location,
                        storage_row.uuid,
                    )
                    (ImageStoragePlacement.delete().where(
                        ImageStoragePlacement.storage == storage_row,
                        ImageStoragePlacement.location == location_row,
                    ).execute())
Esempio n. 8
0
def _lookup_blob_uploaded(repository, blob_digest):
    try:
        return (ImageStorage.select(
            ImageStorage.uuid).join(UploadedBlob).where(
                UploadedBlob.repository == repository,
                ImageStorage.content_checksum == blob_digest,
                ImageStorage.uploading == False,
            ).get())
    except ImageStorage.DoesNotExist:
        return None
Esempio n. 9
0
def populate_storage_for_gc():
    """
    Populate FakeStorage with dummy data for each ImageStorage row.
    """
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        content = b"hello world"
        storage.put_content({preferred}, storage.blob_path(storage_row.content_checksum), content)
        assert storage.exists({preferred}, storage.blob_path(storage_row.content_checksum))

    yield
Esempio n. 10
0
def get_repository_blob_by_digest(repository, blob_digest):
    """ Find the content-addressable blob linked to the specified repository.
  """
    assert blob_digest
    try:
        storage = (ImageStorage.select(ImageStorage.uuid).join(Image).where(
            Image.repository == repository,
            ImageStorage.content_checksum == blob_digest,
            ImageStorage.uploading == False).get())

        return storage_model.get_storage_by_uuid(storage.uuid)
    except (ImageStorage.DoesNotExist, InvalidImageException):
        raise BlobDoesNotExist(
            'Blob does not exist with digest: {0}'.format(blob_digest))
Esempio n. 11
0
def store_tag_manifest(namespace, repo_name, tag_name, image_id):
    builder = DockerSchema1ManifestBuilder(namespace, repo_name, tag_name)
    storage_id_map = {}
    try:
        image_storage = ImageStorage.select().where(
            ~(ImageStorage.content_checksum >> None)).get()
        builder.add_layer(image_storage.content_checksum, '{"id": "foo"}')
        storage_id_map[image_storage.content_checksum] = image_storage.id
    except ImageStorage.DoesNotExist:
        pass

    manifest = builder.build(docker_v2_signing_key)
    manifest_row, _ = model.tag.store_tag_manifest_for_testing(
        namespace, repo_name, tag_name, manifest, image_id, storage_id_map)
    return manifest_row
Esempio n. 12
0
def get_repository_blob_by_digest(repository, blob_digest):
    """ Find the content-addressable blob linked to the specified repository and
      returns it or None if none.
  """
    try:
        storage = (ImageStorage.select(
            ImageStorage.uuid).join(ManifestBlob).where(
                ManifestBlob.repository == repository,
                ImageStorage.content_checksum == blob_digest,
                ImageStorage.uploading == False).get())

        return get_storage_by_uuid(storage.uuid)
    except (ImageStorage.DoesNotExist, InvalidImageException):
        # TODO: Remove once we are no longer using the legacy tables.
        # Try the legacy call.
        try:
            return legacy_get(repository, blob_digest)
        except BlobDoesNotExist:
            return None
Esempio n. 13
0
def get_repo_blob_by_digest(namespace, repo_name, blob_digest):
    """ Find the content-addressable blob linked to the specified repository.
  """
    assert blob_digest
    try:
        storage = (ImageStorage.select(
            ImageStorage.uuid).join(Image).join(Repository).join(
                Namespace,
                on=(Namespace.id == Repository.namespace_user)).where(
                    Repository.name == repo_name,
                    Namespace.username == namespace,
                    ImageStorage.content_checksum == blob_digest,
                    ImageStorage.uploading == False,
                ).get())

        return storage_model.get_storage_by_uuid(storage.uuid)
    except (ImageStorage.DoesNotExist, InvalidImageException):
        raise BlobDoesNotExist(
            "Blob does not exist with digest: {0}".format(blob_digest))
Esempio n. 14
0
    def get_repo_blob_by_digest(self,
                                repository_ref,
                                blob_digest,
                                include_placements=False):
        """
        Returns the blob in the repository with the given digest.

        If the blob is a placeholder, downloads it from the upstream registry.
        Placeholder blobs are blobs that don't yet have a ImageStoragePlacement
        associated with it.

        Note that there may be multiple records in the same repository for the same blob digest, so
        the return value of this function may change.
        """
        blob = self._get_shared_storage(blob_digest)
        if blob is None:
            try:
                blob = (ImageStorage.select().join(ManifestBlob).where(
                    ManifestBlob.repository_id == repository_ref.id,
                    ImageStorage.content_checksum == blob_digest,
                ).get())
            except ImageStorage.DoesNotExist:
                return None

        try:
            ImageStoragePlacement.select().where(
                ImageStoragePlacement.storage == blob).get()
        except ImageStoragePlacement.DoesNotExist:
            try:
                self._download_blob(repository_ref, blob_digest)
            except BlobDigestMismatchException:
                raise UpstreamRegistryError("blob digest mismatch")
            except BlobTooLargeException as e:
                raise UpstreamRegistryError(
                    f"blob too large, max allowed is {e.max_allowed}")
            except BlobRangeMismatchException:
                raise UpstreamRegistryError("range mismatch")
            except BlobUploadException:
                raise UpstreamRegistryError("invalid blob upload")

        return super().get_repo_blob_by_digest(repository_ref, blob_digest,
                                               include_placements)
Esempio n. 15
0
def calculate_image_aggregate_size(ancestors_str, image_size, parent_image):
    ancestors = ancestors_str.split("/")[1:-1]
    if not ancestors:
        return image_size

    if parent_image is None:
        raise DataModelException("Could not load parent image")

    ancestor_size = parent_image.aggregate_size
    if ancestor_size is not None:
        return ancestor_size + image_size

    # Fallback to a slower path if the parent doesn't have an aggregate size saved.
    # TODO: remove this code if/when we do a full backfill.
    ancestor_size = (ImageStorage.select(fn.Sum(ImageStorage.image_size)).join(
        Image).where(Image.id << ancestors).scalar())
    if ancestor_size is None:
        return None

    return ancestor_size + image_size
    def test_connect_existing_blobs_to_new_manifest(self, create_repo):
        repo_ref = create_repo(self.orgname, self.upstream_repository,
                               self.user)
        input_manifest = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(UBI8_8_4_MANIFEST_SCHEMA2),
            DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
        )
        layer = input_manifest.manifest_dict["layers"][0]
        blob = ImageStorage.create(
            image_size=layer["size"],
            uncompressed_size=layer["size"],
            content_checksum=layer["digest"],
        )

        proxy_model = ProxyModel(
            self.orgname,
            self.upstream_repository,
            self.user,
        )
        proxy_model._create_manifest_and_retarget_tag(repo_ref, input_manifest,
                                                      self.tag)
        blob_count = (ImageStorage.select().where(
            ImageStorage.content_checksum == blob.content_checksum).count())
        assert blob_count == 1
Esempio n. 17
0
def assert_gc_integrity(expect_storage_removed=True):
    """
    Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes
    the callback for images removed and doesn't invoke the callback for images *not* removed.
    """

    # Add a callback for when images are removed.
    removed_image_storages = []
    remove_callback = model.config.register_image_cleanup_callback(
        removed_image_storages.extend)

    # Store existing storages. We won't verify these for existence because they
    # were likely created as test data.
    existing_digests = set()
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            existing_digests.add(storage_row.content_checksum)

    for blob_row in ApprBlob.select():
        existing_digests.add(blob_row.digest)

    # Store the number of dangling objects.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()

    # Yield to the GC test.
    with check_transitive_modifications():
        try:
            yield
        finally:
            remove_callback()

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        assert isinstance(removed_image_and_storage, Image)

        try:
            # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row
            # with the same ID, make sure it does not have the same Docker Image ID.
            # See: https://www.sqlite.org/autoinc.html
            found_image = Image.get(id=removed_image_and_storage.id)
            assert (found_image.docker_image_id !=
                    removed_image_and_storage.docker_image_id
                    ), "Found unexpected removed image %s under repo %s" % (
                        found_image.id,
                        found_image.repository,
                    )
        except Image.DoesNotExist:
            pass

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            shared = (UploadedBlob.select().where(
                UploadedBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.content_checksum in existing_digests:
            continue

        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        if blob_row.digest in existing_digests:
            continue

        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
Esempio n. 18
0
def garbage_collect_storage(storage_id_whitelist):
    """
    Performs GC on a possible subset of the storage's with the IDs found in the whitelist.

    The storages in the whitelist will be checked, and any orphaned will be removed, with those IDs
    being returned.
    """
    if len(storage_id_whitelist) == 0:
        return []

    def placements_to_filtered_paths_set(placements_list):
        """
        Returns the list of paths to remove from storage, filtered from the given placements query
        by removing any CAS paths that are still referenced by storage(s) in the database.
        """
        if not placements_list:
            return set()

        with ensure_under_transaction():
            # Find the content checksums not referenced by other storages. Any that are, we cannot
            # remove.
            content_checksums = set(
                [
                    placement.storage.content_checksum
                    for placement in placements_list
                    if placement.storage.cas_path
                ]
            )

            unreferenced_checksums = set()
            if content_checksums:
                # Check the current image storage.
                query = ImageStorage.select(ImageStorage.content_checksum).where(
                    ImageStorage.content_checksum << list(content_checksums)
                )
                is_referenced_checksums = set(
                    [image_storage.content_checksum for image_storage in query]
                )
                if is_referenced_checksums:
                    logger.warning(
                        "GC attempted to remove CAS checksums %s, which are still IS referenced",
                        is_referenced_checksums,
                    )

                # Check the ApprBlob table as well.
                query = ApprBlob.select(ApprBlob.digest).where(
                    ApprBlob.digest << list(content_checksums)
                )
                appr_blob_referenced_checksums = set([blob.digest for blob in query])
                if appr_blob_referenced_checksums:
                    logger.warning(
                        "GC attempted to remove CAS checksums %s, which are ApprBlob referenced",
                        appr_blob_referenced_checksums,
                    )

                unreferenced_checksums = (
                    content_checksums - appr_blob_referenced_checksums - is_referenced_checksums
                )

            # Return all placements for all image storages found not at a CAS path or with a content
            # checksum that is referenced.
            return {
                (
                    get_image_location_for_id(placement.location_id).name,
                    get_layer_path(placement.storage),
                    placement.storage.content_checksum,
                )
                for placement in placements_list
                if not placement.storage.cas_path
                or placement.storage.content_checksum in unreferenced_checksums
            }

    # Note: Both of these deletes must occur in the same transaction (unfortunately) because a
    # storage without any placement is invalid, and a placement cannot exist without a storage.
    # TODO: We might want to allow for null storages on placements, which would allow us to
    # delete the storages, then delete the placements in a non-transaction.
    logger.debug("Garbage collecting storages from candidates: %s", storage_id_whitelist)
    paths_to_remove = []
    orphaned_storage_ids = set()
    for storage_id_to_check in storage_id_whitelist:
        logger.debug("Garbage collecting storage %s", storage_id_to_check)

        with db_transaction():
            if not _is_storage_orphaned(storage_id_to_check):
                continue

            orphaned_storage_ids.add(storage_id_to_check)

            placements_to_remove = list(
                ImageStoragePlacement.select(ImageStoragePlacement, ImageStorage)
                .join(ImageStorage)
                .where(ImageStorage.id == storage_id_to_check)
            )

            # Remove the placements for orphaned storages
            if placements_to_remove:
                ImageStoragePlacement.delete().where(
                    ImageStoragePlacement.storage == storage_id_to_check
                ).execute()

            # Remove all orphaned storages
            TorrentInfo.delete().where(TorrentInfo.storage == storage_id_to_check).execute()

            ImageStorageSignature.delete().where(
                ImageStorageSignature.storage == storage_id_to_check
            ).execute()

            ImageStorage.delete().where(ImageStorage.id == storage_id_to_check).execute()

            # Determine the paths to remove. We cannot simply remove all paths matching storages, as CAS
            # can share the same path. We further filter these paths by checking for any storages still in
            # the database with the same content checksum.
            paths_to_remove.extend(placements_to_filtered_paths_set(placements_to_remove))

    # We are going to make the conscious decision to not delete image storage blobs inside
    # transactions.
    # This may end up producing garbage in s3, trading off for higher availability in the database.
    paths_to_remove = list(set(paths_to_remove))
    for location_name, image_path, storage_checksum in paths_to_remove:
        if storage_checksum:
            # Skip any specialized blob digests that we know we should keep around.
            if storage_checksum in SPECIAL_BLOB_DIGESTS:
                continue

            # Perform one final check to ensure the blob is not needed.
            if (
                ImageStorage.select()
                .where(ImageStorage.content_checksum == storage_checksum)
                .exists()
            ):
                continue

        logger.debug("Removing %s from %s", image_path, location_name)
        config.store.remove({location_name}, image_path)

    return orphaned_storage_ids
Esempio n. 19
0
def assert_gc_integrity(expect_storage_removed=True, check_oci_tags=True):
    """ Specialized assertion for ensuring that GC cleans up all dangling storages
      and labels, invokes the callback for images removed and doesn't invoke the
      callback for images *not* removed.
  """
    # Add a callback for when images are removed.
    removed_image_storages = []
    model.config.register_image_cleanup_callback(removed_image_storages.extend)

    # Store the number of dangling storages and labels.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()
    yield

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        with pytest.raises(Image.DoesNotExist):
            Image.get(id=removed_image_and_storage.id)

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure there are no danglings OCI tags.
    if check_oci_tags:
        oci_tags = {t.id for t in Tag.select()}
        referenced_oci_tags = {t.tag_id for t in TagToRepositoryTag.select()}
        assert not oci_tags - referenced_oci_tags

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
Esempio n. 20
0
def repeated_storages():
  storages = list(ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).limit(5))
  return storages + storages
Esempio n. 21
0
def random_storages():
  return list(ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).limit(10))
Esempio n. 22
0
from data.database import Image, ImageStorage
from peewee import JOIN, fn
from app import app

orphaned = (ImageStorage.select().where(ImageStorage.uploading == False).join(
    Image,
    JOIN.LEFT_OUTER).group_by(ImageStorage).having(fn.Count(Image.id) == 0))

counter = 0
for orphan in orphaned:
    counter += 1
    print(orphan.uuid)
Esempio n. 23
0
from data.database import Image, ImageStorage

query = ImageStorage.select().annotate(Image)

saved_bytes = 0
total_bytes = 0

for storage in query:
    if storage.image_size is not None:
        saved_bytes += (storage.count - 1) * storage.image_size
        total_bytes += storage.count * storage.image_size

print "Saved: %s" % saved_bytes
print "Total: %s" % total_bytes