예제 #1
0
파일: storage.py 프로젝트: sabre1041/quay-1
def _is_storage_orphaned(candidate_id):
    """
    Returns the whether the given candidate storage ID is orphaned. Must be executed
    under a transaction.
    """
    with ensure_under_transaction():
        try:
            ManifestBlob.get(blob=candidate_id)
            return False
        except ManifestBlob.DoesNotExist:
            pass

        try:
            Image.get(storage=candidate_id)
            return False
        except Image.DoesNotExist:
            pass

        try:
            UploadedBlob.get(blob=candidate_id)
            return False
        except UploadedBlob.DoesNotExist:
            pass

    return True
예제 #2
0
def test_purge_repository_storage_blob(default_tag_policy, initialized_db):
    with populate_storage_for_gc():
        expected_blobs_removed_from_storage = set()
        preferred = storage.preferred_locations[0]

        # Check that existing uploadedblobs has an object in storage
        for repo in database.Repository.select().order_by(database.Repository.id):
            for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo):
                assert storage.exists(
                    {preferred}, storage.blob_path(uploadedblob.blob.content_checksum)
                )

        # Remove eveyrhing
        for repo in database.Repository.select():  # .order_by(database.Repository.id):
            for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo):
                # Check if only this repository is referencing the uploadedblob
                # If so, the blob should be removed from storage
                has_depedent_manifestblob = (
                    ManifestBlob.select()
                    .where(
                        ManifestBlob.blob == uploadedblob.blob,
                        ManifestBlob.repository != repo,
                    )
                    .count()
                )
                has_dependent_image = (
                    Image.select()
                    .where(
                        Image.storage == uploadedblob.blob,
                        Image.repository != repo,
                    )
                    .count()
                )
                has_dependent_uploadedblobs = (
                    UploadedBlob.select()
                    .where(
                        UploadedBlob == uploadedblob,
                        UploadedBlob.repository != repo,
                    )
                    .count()
                )

                if (
                    not has_depedent_manifestblob
                    and not has_dependent_image
                    and not has_dependent_uploadedblobs
                ):
                    expected_blobs_removed_from_storage.add(uploadedblob.blob)

            assert model.gc.purge_repository(repo, force=True)

        for removed_blob_from_storage in expected_blobs_removed_from_storage:
            assert not storage.exists(
                {preferred}, storage.blob_path(removed_blob_from_storage.content_checksum)
            )
예제 #3
0
def _get_dangling_storage_count():
    storage_ids = set([current.id for current in ImageStorage.select()])
    referenced_by_image = set([image.storage_id for image in Image.select()])
    referenced_by_manifest = set(
        [blob.blob_id for blob in ManifestBlob.select()])
    referenced_by_uploaded = set(
        [upload.blob_id for upload in UploadedBlob.select()])
    referenced_by_derived_image = set(
        [derived.derivative_id for derived in DerivedStorageForImage.select()])
    return len(storage_ids - referenced_by_image -
               referenced_by_derived_image - referenced_by_manifest -
               referenced_by_uploaded)
예제 #4
0
def _temp_link_blob(repository_id, storage, link_expiration_s):
    """ Note: Should *always* be called by a parent under a transaction. """
    try:
        repository = Repository.get(id=repository_id)
    except Repository.DoesNotExist:
        return None

    if repository.state == RepositoryState.MARKED_FOR_DELETION:
        return None

    return UploadedBlob.create(
        repository=repository_id,
        blob=storage,
        expires_at=datetime.utcnow() + timedelta(seconds=link_expiration_s),
    )
예제 #5
0
def test_garbage_collect_storage(default_tag_policy, initialized_db):
    with populate_storage_for_gc():
        preferred = storage.preferred_locations[0]

        # Get a random sample of storages
        uploadedblobs = list(UploadedBlob.select())
        random_uploadedblobs = random.sample(
            uploadedblobs, random.randrange(1, len(uploadedblobs) + 1)
        )
        model.storage.garbage_collect_storage([b.blob.id for b in random_uploadedblobs])
        # Ensure that the blobs' storage weren't removed, since we didn't GC anything
        for uploadedblob in random_uploadedblobs:
            assert storage.exists(
                {preferred}, storage.blob_path(uploadedblob.blob.content_checksum)
            )
예제 #6
0
def lookup_expired_uploaded_blobs(repository):
    """ Looks up all expired uploaded blobs in a repository. """
    return UploadedBlob.select().where(
        UploadedBlob.repository == repository,
        UploadedBlob.expires_at <= datetime.utcnow())
예제 #7
0
def _delete_temp_links(repo):
    """ Deletes any temp links to blobs. """
    UploadedBlob.delete().where(UploadedBlob.repository == repo).execute()
예제 #8
0
def assert_gc_integrity(expect_storage_removed=True):
    """
    Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes
    the callback for images removed and doesn't invoke the callback for images *not* removed.
    """

    # Add a callback for when images are removed.
    removed_image_storages = []
    remove_callback = model.config.register_image_cleanup_callback(
        removed_image_storages.extend)

    # Store existing storages. We won't verify these for existence because they
    # were likely created as test data.
    existing_digests = set()
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            existing_digests.add(storage_row.content_checksum)

    for blob_row in ApprBlob.select():
        existing_digests.add(blob_row.digest)

    # Store the number of dangling objects.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()

    # Yield to the GC test.
    with check_transitive_modifications():
        try:
            yield
        finally:
            remove_callback()

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        assert isinstance(removed_image_and_storage, Image)

        try:
            # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row
            # with the same ID, make sure it does not have the same Docker Image ID.
            # See: https://www.sqlite.org/autoinc.html
            found_image = Image.get(id=removed_image_and_storage.id)
            assert (found_image.docker_image_id !=
                    removed_image_and_storage.docker_image_id
                    ), "Found unexpected removed image %s under repo %s" % (
                        found_image.id,
                        found_image.repository,
                    )
        except Image.DoesNotExist:
            pass

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            shared = (UploadedBlob.select().where(
                UploadedBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.content_checksum in existing_digests:
            continue

        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        if blob_row.digest in existing_digests:
            continue

        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
예제 #9
0
파일: gc.py 프로젝트: kleesc/quay
def purge_repository(repo, force=False):
    """
    Completely delete all traces of the repository.

    Will return True upon complete success, and False upon partial or total failure. Garbage
    collection is incremental and repeatable, so this return value does not need to be checked or
    responded to.
    """
    assert repo.state == RepositoryState.MARKED_FOR_DELETION or force

    # Update the repo state to ensure nothing else is written to it.
    repo.state = RepositoryState.MARKED_FOR_DELETION
    repo.save()

    # Delete the repository of all Appr-referenced entries.
    # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags,
    # and MySQL is... particular... about such relationships when deleting.
    if repo.kind.name == "application":
        fst_pass = (ApprTag.delete().where(
            ApprTag.repository == repo,
            ~(ApprTag.linked_tag >> None)).execute())
        snd_pass = ApprTag.delete().where(ApprTag.repository == repo).execute()
        gc_table_rows_deleted.labels(table="ApprTag").inc(fst_pass + snd_pass)
    else:
        # GC to remove the images and storage.
        _purge_repository_contents(repo)

    # Ensure there are no additional tags, manifests, images or blobs in the repository.
    assert ApprTag.select().where(ApprTag.repository == repo).count() == 0
    assert Tag.select().where(Tag.repository == repo).count() == 0
    assert RepositoryTag.select().where(
        RepositoryTag.repository == repo).count() == 0
    assert Manifest.select().where(Manifest.repository == repo).count() == 0
    assert ManifestBlob.select().where(
        ManifestBlob.repository == repo).count() == 0
    assert UploadedBlob.select().where(
        UploadedBlob.repository == repo).count() == 0
    assert (ManifestSecurityStatus.select().where(
        ManifestSecurityStatus.repository == repo).count() == 0)
    assert Image.select().where(Image.repository == repo).count() == 0

    # Delete any repository build triggers, builds, and any other large-ish reference tables for
    # the repository.
    _chunk_delete_all(repo, RepositoryPermission, force=force)
    _chunk_delete_all(repo, RepositoryBuild, force=force)
    _chunk_delete_all(repo, RepositoryBuildTrigger, force=force)
    _chunk_delete_all(repo, RepositoryActionCount, force=force)
    _chunk_delete_all(repo, Star, force=force)
    _chunk_delete_all(repo, AccessToken, force=force)
    _chunk_delete_all(repo, RepositoryNotification, force=force)
    _chunk_delete_all(repo, BlobUpload, force=force)
    _chunk_delete_all(repo, RepoMirrorConfig, force=force)
    _chunk_delete_all(repo, RepositoryAuthorizedEmail, force=force)

    # Delete any marker rows for the repository.
    DeletedRepository.delete().where(
        DeletedRepository.repository == repo).execute()

    # Delete the rest of the repository metadata.
    try:
        # Make sure the repository still exists.
        fetched = Repository.get(id=repo.id)
    except Repository.DoesNotExist:
        return False

    try:
        fetched.delete_instance(recursive=True,
                                delete_nullable=False,
                                force=force)
        gc_repos_purged.inc()
        return True
    except IntegrityError:
        return False
예제 #10
0
파일: gc.py 프로젝트: kleesc/quay
def _purge_repository_contents(repo):
    """
    Purges all the contents of a repository, removing all of its tags, manifests and images.
    """
    logger.debug("Purging repository %s", repo)

    # Purge via all the tags.
    while True:
        found = False
        for tags in _chunk_iterate_for_deletion(
                Tag.select().where(Tag.repository == repo)):
            logger.debug("Found %s tags to GC under repository %s", len(tags),
                         repo)
            found = True
            context = _GarbageCollectorContext(repo)
            for tag in tags:
                logger.debug("Deleting tag %s under repository %s", tag, repo)
                assert tag.repository_id == repo.id
                _purge_oci_tag(tag, context, allow_non_expired=True)

            _run_garbage_collection(context)

        if not found:
            break

    # Purge any uploaded blobs that have expired.
    while True:
        found = False
        for uploaded_blobs in _chunk_iterate_for_deletion(
                UploadedBlob.select().where(UploadedBlob.repository == repo)):
            logger.debug("Found %s uploaded blobs to GC under repository %s",
                         len(uploaded_blobs), repo)
            found = True
            context = _GarbageCollectorContext(repo)
            for uploaded_blob in uploaded_blobs:
                logger.debug("Deleting uploaded blob %s under repository %s",
                             uploaded_blob, repo)
                assert uploaded_blob.repository_id == repo.id
                _purge_uploaded_blob(uploaded_blob,
                                     context,
                                     allow_non_expired=True)

            _run_garbage_collection(context)

        if not found:
            break

    # TODO: remove this once we've removed the foreign key constraints from RepositoryTag
    # and Image.
    while True:
        found = False
        repo_tag_query = RepositoryTag.select().where(
            RepositoryTag.repository == repo)
        for tags in _chunk_iterate_for_deletion(repo_tag_query):
            logger.debug("Found %s tags to GC under repository %s", len(tags),
                         repo)
            found = True
            context = _GarbageCollectorContext(repo)

            for tag in tags:
                logger.debug("Deleting tag %s under repository %s", tag, repo)
                assert tag.repository_id == repo.id
                _purge_pre_oci_tag(tag, context, allow_non_expired=True)

            _run_garbage_collection(context)

        if not found:
            break

    assert Tag.select().where(Tag.repository == repo).count() == 0
    assert RepositoryTag.select().where(
        RepositoryTag.repository == repo).count() == 0
    assert Manifest.select().where(Manifest.repository == repo).count() == 0
    assert ManifestBlob.select().where(
        ManifestBlob.repository == repo).count() == 0
    assert UploadedBlob.select().where(
        UploadedBlob.repository == repo).count() == 0

    # Add all remaining images to a new context. We do this here to minimize the number of images
    # we need to load.
    while True:
        found_image = False
        image_context = _GarbageCollectorContext(repo)

        existing_count = Image.select().where(Image.repository == repo).count()
        if not existing_count:
            break

        for image in Image.select().where(Image.repository == repo):
            found_image = True
            logger.debug("Trying to delete image %s under repository %s",
                         image, repo)
            assert image.repository_id == repo.id
            image_context.add_legacy_image_id(image.id)

        _run_garbage_collection(image_context)
        new_count = Image.select().where(Image.repository == repo).count()
        if new_count >= existing_count:
            raise Exception("GC purge bug! Please report this to support!")