예제 #1
0
파일: storage.py 프로젝트: sabre1041/quay-1
def _lookup_repo_storages_by_content_checksum(repo, checksums, model_class):
    assert checksums

    # There may be many duplicates of the checksums, so for performance reasons we are going
    # to use a union to select just one storage with each checksum
    queries = []

    for counter, checksum in enumerate(checksums):
        query_alias = "q{0}".format(counter)

        candidate_subq = (
            ImageStorage.select(
                ImageStorage.id,
                ImageStorage.content_checksum,
                ImageStorage.image_size,
                ImageStorage.uuid,
                ImageStorage.cas_path,
                ImageStorage.uncompressed_size,
                ImageStorage.uploading,
            )
            .join(model_class)
            .where(model_class.repository == repo, ImageStorage.content_checksum == checksum)
            .limit(1)
            .alias(query_alias)
        )

        queries.append(ImageStorage.select(SQL("*")).from_(candidate_subq))

    assert queries
    return _basequery.reduce_as_tree(queries)
예제 #2
0
파일: blob.py 프로젝트: jonathankingfc/quay
def get_or_create_shared_blob(digest, byte_data, storage):
    """
    Returns the ImageStorage blob with the given digest or, if not present, adds a row and writes
    the given byte data to the storage engine.

    This method is *only* to be used for shared blobs that are globally accessible, such as the
    special empty gzipped tar layer that Docker no longer pushes to us.
    """
    assert digest
    assert byte_data is not None and isinstance(byte_data, bytes)
    assert storage

    try:
        return ImageStorage.get(content_checksum=digest)
    except ImageStorage.DoesNotExist:
        preferred = storage.preferred_locations[0]
        location_obj = ImageStorageLocation.get(name=preferred)

        record = ImageStorage.create(image_size=len(byte_data),
                                     content_checksum=digest)

        try:
            storage.put_content([preferred],
                                storage_model.get_layer_path(record),
                                byte_data)
            ImageStoragePlacement.create(storage=record, location=location_obj)
        except:
            logger.exception("Exception when trying to write special layer %s",
                             digest)
            record.delete_instance()
            raise

        return record
예제 #3
0
def _orphaned_storage_query(candidate_ids):
    """ Returns the subset of the candidate ImageStorage IDs representing storages that are no
      longer referenced by images.
  """
    # Issue a union query to find all storages that are still referenced by a candidate storage. This
    # is much faster than the group_by and having call we used to use here.
    nonorphaned_queries = []
    for counter, candidate_id in enumerate(candidate_ids):
        query_alias = 'q{0}'.format(counter)

        # TODO: remove the join with Image once fully on the OCI data model.
        storage_subq = (ImageStorage.select(ImageStorage.id).join(Image).where(
            ImageStorage.id == candidate_id).limit(1).alias(query_alias))

        nonorphaned_queries.append(
            ImageStorage.select(SQL('*')).from_(storage_subq))

        manifest_storage_subq = (ImageStorage.select(
            ImageStorage.id).join(ManifestBlob).where(
                ImageStorage.id == candidate_id).limit(1).alias(query_alias))

        nonorphaned_queries.append(
            ImageStorage.select(SQL('*')).from_(manifest_storage_subq))

    # Build the set of storages that are missing. These storages are orphaned.
    nonorphaned_storage_ids = {
        storage.id
        for storage in _basequery.reduce_as_tree(nonorphaned_queries)
    }
    return list(candidate_ids - nonorphaned_storage_ids)
예제 #4
0
def test_manifest_v2_shared_config_and_blobs(app, default_tag_policy):
    """
    Test that GCing a tag that refers to a V2 manifest with the same config and some shared blobs as
    another manifest ensures that the config blob and shared blob are NOT GCed.
    """
    repo = model.repository.create_repository("devtable", "newrepo", None)
    manifest1, built1 = create_manifest_for_testing(repo,
                                                    differentiation_field="1",
                                                    include_shared_blob=True)
    manifest2, built2 = create_manifest_for_testing(repo,
                                                    differentiation_field="2",
                                                    include_shared_blob=True)

    assert set(built1.local_blob_digests).intersection(
        built2.local_blob_digests)
    assert built1.config.digest == built2.config.digest

    # Create tags pointing to the manifests.
    model.oci.tag.retarget_tag("tag1", manifest1)
    model.oci.tag.retarget_tag("tag2", manifest2)

    with assert_gc_integrity(expect_storage_removed=True):
        # Delete tag2.
        model.oci.tag.delete_tag(repo, "tag2")
        assert gc_now(repo)

    # Ensure the blobs for manifest1 still all exist.
    preferred = storage.preferred_locations[0]
    for blob_digest in built1.local_blob_digests:
        storage_row = ImageStorage.get(content_checksum=blob_digest)

        assert storage_row.cas_path
        storage.get_content({preferred},
                            storage.blob_path(storage_row.content_checksum))
예제 #5
0
def test_manifest_backfill_broken_tag(clear_rows, initialized_db):
    """
    Tests backfilling a broken tag.
    """
    # Delete existing tag manifest so we can reuse the tag.
    TagManifestLabel.delete().execute()
    TagManifest.delete().execute()

    # Create a tag with an image referenced missing parent images.
    repo = model.repository.get_repository("devtable", "gargantuan")
    broken_image = Image.create(
        docker_image_id="foo",
        repository=repo,
        ancestors="/348723847234/",
        storage=ImageStorage.get(),
    )
    broken_image_tag = RepositoryTag.create(repository=repo,
                                            image=broken_image,
                                            name="broken")

    # Backfill the tag.
    assert backfill_tag(broken_image_tag)

    # Ensure we backfilled, even though we reference a broken manifest.
    tag_manifest = TagManifest.get(tag=broken_image_tag)

    map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
    manifest = map_row.manifest
    assert manifest.manifest_bytes == tag_manifest.json_data

    tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag
    assert tag.name == "broken"
    assert tag.manifest == manifest
예제 #6
0
파일: storage.py 프로젝트: epasham/quay-1
    def placements_to_filtered_paths_set(placements_list):
        """
        Returns the list of paths to remove from storage, filtered from the given placements query
        by removing any CAS paths that are still referenced by storage(s) in the database.
        """
        if not placements_list:
            return set()

        with ensure_under_transaction():
            # Find the content checksums not referenced by other storages. Any that are, we cannot
            # remove.
            content_checksums = set([
                placement.storage.content_checksum
                for placement in placements_list if placement.storage.cas_path
            ])

            unreferenced_checksums = set()
            if content_checksums:
                # Check the current image storage.
                query = ImageStorage.select(
                    ImageStorage.content_checksum
                ).where(
                    ImageStorage.content_checksum << list(content_checksums))
                is_referenced_checksums = set([
                    image_storage.content_checksum for image_storage in query
                ])
                if is_referenced_checksums:
                    logger.warning(
                        "GC attempted to remove CAS checksums %s, which are still IS referenced",
                        is_referenced_checksums,
                    )

                # Check the ApprBlob table as well.
                query = ApprBlob.select(ApprBlob.digest).where(
                    ApprBlob.digest << list(content_checksums))
                appr_blob_referenced_checksums = set(
                    [blob.digest for blob in query])
                if appr_blob_referenced_checksums:
                    logger.warning(
                        "GC attempted to remove CAS checksums %s, which are ApprBlob referenced",
                        appr_blob_referenced_checksums,
                    )

                unreferenced_checksums = (content_checksums -
                                          appr_blob_referenced_checksums -
                                          is_referenced_checksums)

            # Return all placements for all image storages found not at a CAS path or with a content
            # checksum that is referenced.
            return {
                (
                    get_image_location_for_id(placement.location_id).name,
                    get_layer_path(placement.storage),
                    placement.storage.content_checksum,
                )
                for placement in placements_list
                if not placement.storage.cas_path
                or placement.storage.content_checksum in unreferenced_checksums
            }
예제 #7
0
파일: storage.py 프로젝트: epasham/quay-1
def lookup_repo_storages_by_content_checksum(repo,
                                             checksums,
                                             by_manifest=False):
    """
    Looks up repository storages (without placements) matching the given repository and checksum.
    """
    if not checksums:
        return []

    # There may be many duplicates of the checksums, so for performance reasons we are going
    # to use a union to select just one storage with each checksum
    queries = []

    for counter, checksum in enumerate(set(checksums)):
        query_alias = "q{0}".format(counter)

        # TODO: Remove once we have a new-style model for tracking temp uploaded blobs and
        # all legacy tables have been removed.
        if by_manifest:
            candidate_subq = (ImageStorage.select(
                ImageStorage.id,
                ImageStorage.content_checksum,
                ImageStorage.image_size,
                ImageStorage.uuid,
                ImageStorage.cas_path,
                ImageStorage.uncompressed_size,
                ImageStorage.uploading,
            ).join(ManifestBlob).where(
                ManifestBlob.repository == repo, ImageStorage.content_checksum
                == checksum).limit(1).alias(query_alias))
        else:
            candidate_subq = (ImageStorage.select(
                ImageStorage.id,
                ImageStorage.content_checksum,
                ImageStorage.image_size,
                ImageStorage.uuid,
                ImageStorage.cas_path,
                ImageStorage.uncompressed_size,
                ImageStorage.uploading,
            ).join(Image).where(Image.repository == repo,
                                ImageStorage.content_checksum ==
                                checksum).limit(1).alias(query_alias))

        queries.append(ImageStorage.select(SQL("*")).from_(candidate_subq))

    return _basequery.reduce_as_tree(queries)
예제 #8
0
def store_blob_record_and_temp_link_in_repo(
    repository_id,
    blob_digest,
    location_obj,
    byte_count,
    link_expiration_s,
    uncompressed_byte_count=None,
):
    """
    Store a record of the blob and temporarily link it to the specified repository.
    """
    assert blob_digest
    assert byte_count is not None

    with db_transaction():
        try:
            storage = ImageStorage.get(content_checksum=blob_digest)
            save_changes = False

            if storage.image_size is None:
                storage.image_size = byte_count
                save_changes = True

            if storage.uncompressed_size is None and uncompressed_byte_count is not None:
                storage.uncompressed_size = uncompressed_byte_count
                save_changes = True

            if save_changes:
                storage.save()

            ImageStoragePlacement.get(storage=storage, location=location_obj)
        except ImageStorage.DoesNotExist:
            storage = ImageStorage.create(
                content_checksum=blob_digest,
                uploading=False,
                image_size=byte_count,
                uncompressed_size=uncompressed_byte_count,
            )
            ImageStoragePlacement.create(storage=storage,
                                         location=location_obj)
        except ImageStoragePlacement.DoesNotExist:
            ImageStoragePlacement.create(storage=storage,
                                         location=location_obj)

        _temp_link_blob(repository_id, storage, link_expiration_s)
        return storage
예제 #9
0
def _get_dangling_storage_count():
    storage_ids = set([current.id for current in ImageStorage.select()])
    referenced_by_image = set([image.storage_id for image in Image.select()])
    referenced_by_manifest = set([blob.blob_id for blob in ManifestBlob.select()])
    referenced_by_derived = set(
        [derived.derivative_id for derived in DerivedStorageForImage.select()]
    )
    return len(storage_ids - referenced_by_image - referenced_by_derived - referenced_by_manifest)
예제 #10
0
 def _create_blob(self, digest: str, size: int, manifest_id: int,
                  repo_id: int):
     try:
         blob = ImageStorage.get(content_checksum=digest)
     except ImageStorage.DoesNotExist:
         # TODO: which size should we really be setting here?
         blob = ImageStorage.create(content_checksum=digest,
                                    image_size=size,
                                    compressed_size=size)
     try:
         ManifestBlob.get(manifest_id=manifest_id,
                          blob=blob,
                          repository_id=repo_id)
     except ManifestBlob.DoesNotExist:
         ManifestBlob.create(manifest_id=manifest_id,
                             blob=blob,
                             repository_id=repo_id)
     return blob
예제 #11
0
파일: blob.py 프로젝트: quay/quay
def _lookup_blob_in_repository(repository, blob_digest):
    try:
        return (ImageStorage.select(
            ImageStorage.uuid).join(ManifestBlob).where(
                ManifestBlob.repository == repository,
                ImageStorage.content_checksum == blob_digest,
            ).get())
    except ImageStorage.DoesNotExist:
        return None
예제 #12
0
def verify_placements():
    encountered = set()

    iterator = yield_random_entries(
        lambda: ImageStorage.select().where(ImageStorage.uploading == False),
        ImageStorage.id,
        1000,
        ImageStorage.select(fn.Max(ImageStorage.id)).scalar(),
        1,
    )

    for storage_row, abt, _ in iterator:
        if storage_row.id in encountered:
            continue

        encountered.add(storage_row.id)

        logger.info("Checking placements for storage `%s`", storage_row.uuid)
        try:
            with_locations = model.storage.get_storage_by_uuid(
                storage_row.uuid)
        except model.InvalidImageException:
            logger.exception("Could not find storage `%s`", storage_row.uuid)
            continue

        storage_path = model.storage.get_layer_path(storage_row)
        locations_to_check = set(with_locations.locations)
        if locations_to_check:
            logger.info("Checking locations `%s` for storage `%s`",
                        locations_to_check, storage_row.uuid)
            for location in locations_to_check:
                logger.info("Checking location `%s` for storage `%s`",
                            location, storage_row.uuid)
                if not storage.exists([location], storage_path):
                    location_row = _get_location_row(location)
                    logger.info(
                        "Location `%s` is missing for storage `%s`; removing",
                        location,
                        storage_row.uuid,
                    )
                    (ImageStoragePlacement.delete().where(
                        ImageStoragePlacement.storage == storage_row,
                        ImageStoragePlacement.location == location_row,
                    ).execute())
예제 #13
0
def _lookup_blob_uploaded(repository, blob_digest):
    try:
        return (ImageStorage.select(
            ImageStorage.uuid).join(UploadedBlob).where(
                UploadedBlob.repository == repository,
                ImageStorage.content_checksum == blob_digest,
                ImageStorage.uploading == False,
            ).get())
    except ImageStorage.DoesNotExist:
        return None
예제 #14
0
def get_shared_blob(digest):
    """ Returns the ImageStorage blob with the given digest or, if not present,
      returns None. This method is *only* to be used for shared blobs that are
      globally accessible, such as the special empty gzipped tar layer that Docker
      no longer pushes to us.
  """
    assert digest
    try:
        return ImageStorage.get(content_checksum=digest, uploading=False)
    except ImageStorage.DoesNotExist:
        return None
예제 #15
0
파일: image.py 프로젝트: quay/quay
def get_image_with_storage_and_parent_base():
    Parent = Image.alias()
    ParentImageStorage = ImageStorage.alias()

    return (
        Image.select(Image, ImageStorage, Parent, ParentImageStorage)
        .join(ImageStorage)
        .switch(Image)
        .join(Parent, JOIN.LEFT_OUTER, on=(Image.parent == Parent.id))
        .join(ParentImageStorage, JOIN.LEFT_OUTER, on=(ParentImageStorage.id == Parent.storage))
    )
예제 #16
0
def populate_storage_for_gc():
    """
    Populate FakeStorage with dummy data for each ImageStorage row.
    """
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        content = b"hello world"
        storage.put_content({preferred}, storage.blob_path(storage_row.content_checksum), content)
        assert storage.exists({preferred}, storage.blob_path(storage_row.content_checksum))

    yield
예제 #17
0
def temp_link_blob(repository_id, blob_digest, link_expiration_s):
    """ Temporarily links to the blob record from the given repository. If the blob record is not
      found, return None.
  """
    assert blob_digest

    with db_transaction():
        try:
            storage = ImageStorage.get(content_checksum=blob_digest)
        except ImageStorage.DoesNotExist:
            return None

        _temp_link_blob(repository_id, storage, link_expiration_s)
        return storage
예제 #18
0
def get_repository_blob_by_digest(repository, blob_digest):
    """ Find the content-addressable blob linked to the specified repository.
  """
    assert blob_digest
    try:
        storage = (ImageStorage.select(ImageStorage.uuid).join(Image).where(
            Image.repository == repository,
            ImageStorage.content_checksum == blob_digest,
            ImageStorage.uploading == False).get())

        return storage_model.get_storage_by_uuid(storage.uuid)
    except (ImageStorage.DoesNotExist, InvalidImageException):
        raise BlobDoesNotExist(
            'Blob does not exist with digest: {0}'.format(blob_digest))
    def test_connect_existing_blobs_to_new_manifest(self, create_repo):
        repo_ref = create_repo(self.orgname, self.upstream_repository,
                               self.user)
        input_manifest = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(UBI8_8_4_MANIFEST_SCHEMA2),
            DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
        )
        layer = input_manifest.manifest_dict["layers"][0]
        blob = ImageStorage.create(
            image_size=layer["size"],
            uncompressed_size=layer["size"],
            content_checksum=layer["digest"],
        )

        proxy_model = ProxyModel(
            self.orgname,
            self.upstream_repository,
            self.user,
        )
        proxy_model._create_manifest_and_retarget_tag(repo_ref, input_manifest,
                                                      self.tag)
        blob_count = (ImageStorage.select().where(
            ImageStorage.content_checksum == blob.content_checksum).count())
        assert blob_count == 1
예제 #20
0
파일: test_gc.py 프로젝트: zhill/quay
def store_tag_manifest(namespace, repo_name, tag_name, image_id):
    builder = DockerSchema1ManifestBuilder(namespace, repo_name, tag_name)
    storage_id_map = {}
    try:
        image_storage = ImageStorage.select().where(
            ~(ImageStorage.content_checksum >> None)).get()
        builder.add_layer(image_storage.content_checksum, '{"id": "foo"}')
        storage_id_map[image_storage.content_checksum] = image_storage.id
    except ImageStorage.DoesNotExist:
        pass

    manifest = builder.build(docker_v2_signing_key)
    manifest_row, _ = model.tag.store_tag_manifest_for_testing(
        namespace, repo_name, tag_name, manifest, image_id, storage_id_map)
    return manifest_row
예제 #21
0
파일: tag.py 프로젝트: zhill/quay
def get_tags_images_eligible_for_scan(clair_version):
    Parent = Image.alias()
    ParentImageStorage = ImageStorage.alias()

    return _tag_alive(
        RepositoryTag.select(Image, ImageStorage, Parent, ParentImageStorage,
                             RepositoryTag).join(
                                 Image, on=(RepositoryTag.image == Image.id)).
        join(ImageStorage,
             on=(Image.storage == ImageStorage.id)).switch(Image).join(
                 Parent, JOIN.LEFT_OUTER, on=(Image.parent == Parent.id)).join(
                     ParentImageStorage,
                     JOIN.LEFT_OUTER,
                     on=(ParentImageStorage.id == Parent.storage)).where(
                         RepositoryTag.hidden == False).where(
                             Image.security_indexed_engine < clair_version))
예제 #22
0
def get_repository_blob_by_digest(repository, blob_digest):
    """ Find the content-addressable blob linked to the specified repository and
      returns it or None if none.
  """
    try:
        storage = (ImageStorage.select(
            ImageStorage.uuid).join(ManifestBlob).where(
                ManifestBlob.repository == repository,
                ImageStorage.content_checksum == blob_digest,
                ImageStorage.uploading == False).get())

        return get_storage_by_uuid(storage.uuid)
    except (ImageStorage.DoesNotExist, InvalidImageException):
        # TODO: Remove once we are no longer using the legacy tables.
        # Try the legacy call.
        try:
            return legacy_get(repository, blob_digest)
        except BlobDoesNotExist:
            return None
예제 #23
0
def get_repo_blob_by_digest(namespace, repo_name, blob_digest):
    """ Find the content-addressable blob linked to the specified repository.
  """
    assert blob_digest
    try:
        storage = (ImageStorage.select(
            ImageStorage.uuid).join(Image).join(Repository).join(
                Namespace,
                on=(Namespace.id == Repository.namespace_user)).where(
                    Repository.name == repo_name,
                    Namespace.username == namespace,
                    ImageStorage.content_checksum == blob_digest,
                    ImageStorage.uploading == False,
                ).get())

        return storage_model.get_storage_by_uuid(storage.uuid)
    except (ImageStorage.DoesNotExist, InvalidImageException):
        raise BlobDoesNotExist(
            "Blob does not exist with digest: {0}".format(blob_digest))
예제 #24
0
    def get_repo_blob_by_digest(self,
                                repository_ref,
                                blob_digest,
                                include_placements=False):
        """
        Returns the blob in the repository with the given digest.

        If the blob is a placeholder, downloads it from the upstream registry.
        Placeholder blobs are blobs that don't yet have a ImageStoragePlacement
        associated with it.

        Note that there may be multiple records in the same repository for the same blob digest, so
        the return value of this function may change.
        """
        blob = self._get_shared_storage(blob_digest)
        if blob is None:
            try:
                blob = (ImageStorage.select().join(ManifestBlob).where(
                    ManifestBlob.repository_id == repository_ref.id,
                    ImageStorage.content_checksum == blob_digest,
                ).get())
            except ImageStorage.DoesNotExist:
                return None

        try:
            ImageStoragePlacement.select().where(
                ImageStoragePlacement.storage == blob).get()
        except ImageStoragePlacement.DoesNotExist:
            try:
                self._download_blob(repository_ref, blob_digest)
            except BlobDigestMismatchException:
                raise UpstreamRegistryError("blob digest mismatch")
            except BlobTooLargeException as e:
                raise UpstreamRegistryError(
                    f"blob too large, max allowed is {e.max_allowed}")
            except BlobRangeMismatchException:
                raise UpstreamRegistryError("range mismatch")
            except BlobUploadException:
                raise UpstreamRegistryError("invalid blob upload")

        return super().get_repo_blob_by_digest(repository_ref, blob_digest,
                                               include_placements)
예제 #25
0
    def done(self):
        """ Marks the manifest builder as complete and disposes of any state. This call is optional
        and it is expected manifest builders will eventually time out if unused for an
        extended period of time.
    """
        temp_storages = self._builder_state.temp_storages
        for storage_id in temp_storages:
            try:
                storage = ImageStorage.get(id=storage_id)
                if storage.uploading and storage.content_checksum != EMPTY_LAYER_BLOB_DIGEST:
                    # Delete all the placements pointing to the storage.
                    ImageStoragePlacement.delete().where(
                        ImageStoragePlacement.storage == storage).execute()

                    # Delete the storage.
                    storage.delete_instance()
            except ImageStorage.DoesNotExist:
                pass

        session.pop(_SESSION_KEY, None)
예제 #26
0
def calculate_image_aggregate_size(ancestors_str, image_size, parent_image):
    ancestors = ancestors_str.split("/")[1:-1]
    if not ancestors:
        return image_size

    if parent_image is None:
        raise DataModelException("Could not load parent image")

    ancestor_size = parent_image.aggregate_size
    if ancestor_size is not None:
        return ancestor_size + image_size

    # Fallback to a slower path if the parent doesn't have an aggregate size saved.
    # TODO: remove this code if/when we do a full backfill.
    ancestor_size = (ImageStorage.select(fn.Sum(ImageStorage.image_size)).join(
        Image).where(Image.id << ancestors).scalar())
    if ancestor_size is None:
        return None

    return ancestor_size + image_size
예제 #27
0
    def test_create_manifest_config_blob(self, test_name,
                                         proxy_manifest_response):
        test_params = storage_test_cases[test_name]
        repo = f"{self.orgname}/{test_params['image_name']}"
        params = {
            "repository": repo,
            "manifest_ref": test_params["manifest_ref"],
        }
        proxy_mock = proxy_manifest_response(
            test_params["manifest_ref"],
            test_params["manifest_json"],
            test_params["manifest_type"],
        )
        with patch("data.registry_model.registry_proxy_model.Proxy",
                   MagicMock(return_value=proxy_mock)):
            headers = _get_auth_headers(self.sub, self.ctx, repo)
            headers["Accept"] = ", ".join(
                DOCKER_SCHEMA2_CONTENT_TYPES.union(OCI_CONTENT_TYPES).union(
                    DOCKER_SCHEMA1_CONTENT_TYPES))
            resp = conduct_call(
                self.client,
                test_params["view_name"],
                url_for,
                "GET",
                params,
                expected_code=200,
                headers=headers,
            )

        manifest = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(test_params["manifest_json"]),
            test_params["manifest_type"],
            sparse_manifest_support=True,
        )
        if manifest.schema_version == 2 and not manifest.is_manifest_list:
            q = ImageStorage.filter(
                ImageStorage.content_checksum == manifest.config.digest)
            assert q.count() == 1
예제 #28
0
def assert_gc_integrity(expect_storage_removed=True):
    """
    Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes
    the callback for images removed and doesn't invoke the callback for images *not* removed.
    """

    # Add a callback for when images are removed.
    removed_image_storages = []
    remove_callback = model.config.register_image_cleanup_callback(
        removed_image_storages.extend)

    # Store existing storages. We won't verify these for existence because they
    # were likely created as test data.
    existing_digests = set()
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            existing_digests.add(storage_row.content_checksum)

    for blob_row in ApprBlob.select():
        existing_digests.add(blob_row.digest)

    # Store the number of dangling objects.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()

    # Yield to the GC test.
    with check_transitive_modifications():
        try:
            yield
        finally:
            remove_callback()

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        assert isinstance(removed_image_and_storage, Image)

        try:
            # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row
            # with the same ID, make sure it does not have the same Docker Image ID.
            # See: https://www.sqlite.org/autoinc.html
            found_image = Image.get(id=removed_image_and_storage.id)
            assert (found_image.docker_image_id !=
                    removed_image_and_storage.docker_image_id
                    ), "Found unexpected removed image %s under repo %s" % (
                        found_image.id,
                        found_image.repository,
                    )
        except Image.DoesNotExist:
            pass

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            shared = (UploadedBlob.select().where(
                UploadedBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.content_checksum in existing_digests:
            continue

        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        if blob_row.digest in existing_digests:
            continue

        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
예제 #29
0
파일: test_gc.py 프로젝트: zhill/quay
def assert_gc_integrity(expect_storage_removed=True, check_oci_tags=True):
    """ Specialized assertion for ensuring that GC cleans up all dangling storages
      and labels, invokes the callback for images removed and doesn't invoke the
      callback for images *not* removed.
  """
    # Add a callback for when images are removed.
    removed_image_storages = []
    model.config.register_image_cleanup_callback(removed_image_storages.extend)

    # Store the number of dangling storages and labels.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()
    yield

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        with pytest.raises(Image.DoesNotExist):
            Image.get(id=removed_image_and_storage.id)

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure there are no danglings OCI tags.
    if check_oci_tags:
        oci_tags = {t.id for t in Tag.select()}
        referenced_oci_tags = {t.tag_id for t in TagToRepositoryTag.select()}
        assert not oci_tags - referenced_oci_tags

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
예제 #30
0
def repeated_storages():
  storages = list(ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).limit(5))
  return storages + storages