Ejemplo n.º 1
0
Archivo: gc.py Proyecto: kleesc/quay
def _purge_oci_tag(tag, context, allow_non_expired=False):
    assert tag.repository_id == context.repository.id

    if not allow_non_expired:
        assert tag.lifetime_end_ms is not None
        assert tag.lifetime_end_ms <= oci_tag.get_epoch_timestamp_ms()

    # Add the manifest to be GCed.
    context.add_manifest_id(tag.manifest_id)

    with db_transaction():
        # Reload the tag and verify its lifetime_end_ms has not changed.
        try:
            reloaded_tag = db_for_update(
                Tag.select().where(Tag.id == tag.id)).get()
        except Tag.DoesNotExist:
            return False

        assert reloaded_tag.id == tag.id
        assert reloaded_tag.repository_id == context.repository.id
        if reloaded_tag.lifetime_end_ms != tag.lifetime_end_ms:
            return False

        # Delete mapping rows.
        deleted_tag_to_repotag = (TagToRepositoryTag.delete().where(
            TagToRepositoryTag.tag == tag).execute())

        # Delete the tag.
        tag.delete_instance()

    gc_table_rows_deleted.labels(table="Tag").inc()
    gc_table_rows_deleted.labels(
        table="TagToRepositoryTag").inc(deleted_tag_to_repotag)
Ejemplo n.º 2
0
Archivo: gc.py Proyecto: kleesc/quay
def _garbage_collect_legacy_image(legacy_image_id, context):
    assert legacy_image_id is not None

    # Check if the image is referenced.
    if _check_image_used(legacy_image_id):
        return False

    # We have an unreferenced image. We can now delete it.
    # Grab any derived storage for the image.
    for derived in DerivedStorageForImage.select().where(
            DerivedStorageForImage.source_image == legacy_image_id):
        context.add_blob_id(derived.derivative_id)

    try:
        image = Image.select().where(Image.id == legacy_image_id).get()
    except Image.DoesNotExist:
        return False

    assert image.repository_id == context.repository.id

    # Add the image's blob to be GCed.
    context.add_blob_id(image.storage_id)

    # If the image has a parent ID, add the parent for GC.
    if image.parent_id is not None:
        context.add_legacy_image_id(image.parent_id)

    # Delete the image.
    with db_transaction():
        if _check_image_used(legacy_image_id):
            return False

        try:
            image = Image.select().where(Image.id == legacy_image_id).get()
        except Image.DoesNotExist:
            return False

        assert image.id == legacy_image_id
        assert image.repository_id == context.repository.id

        # Delete any derived storage for the image.
        deleted_derived_storage = (DerivedStorageForImage.delete().where(
            DerivedStorageForImage.source_image == legacy_image_id).execute())

        # Delete the image itself.
        image.delete_instance()

    context.mark_legacy_image_removed(image)

    gc_table_rows_deleted.labels(table="Image").inc()
    gc_table_rows_deleted.labels(
        table="DerivedStorageForImage").inc(deleted_derived_storage)

    if config.image_cleanup_callbacks:
        for callback in config.image_cleanup_callbacks:
            callback([image])

    return True
Ejemplo n.º 3
0
Archivo: gc.py Proyecto: kleesc/quay
def _purge_uploaded_blob(uploaded_blob, context, allow_non_expired=False):
    assert allow_non_expired or uploaded_blob.expires_at <= datetime.utcnow()

    # Add the storage to be checked.
    context.add_blob_id(uploaded_blob.blob_id)

    # Delete the uploaded blob.
    uploaded_blob.delete_instance()
    gc_table_rows_deleted.labels(table="UploadedBlob").inc()
Ejemplo n.º 4
0
Archivo: gc.py Proyecto: kleesc/quay
def _garbage_collect_label(label_id, context):
    assert label_id is not None

    # We can now delete the label.
    with db_transaction():
        if _check_label_used(label_id):
            return False

        result = Label.delete().where(Label.id == label_id).execute() == 1

    if result:
        context.mark_label_id_removed(label_id)
        gc_table_rows_deleted.labels(table="Label").inc(result)

    return result
Ejemplo n.º 5
0
Archivo: gc.py Proyecto: kleesc/quay
def _garbage_collect_legacy_manifest(legacy_manifest_id, context):
    assert legacy_manifest_id is not None

    # Add the labels to be GCed.
    query = TagManifestLabel.select().where(
        TagManifestLabel.annotated == legacy_manifest_id)
    for manifest_label in query:
        context.add_label_id(manifest_label.label_id)

    # Delete the tag manifest.
    with db_transaction():
        try:
            tag_manifest = TagManifest.select().where(
                TagManifest.id == legacy_manifest_id).get()
        except TagManifest.DoesNotExist:
            return False

        assert tag_manifest.id == legacy_manifest_id
        assert tag_manifest.tag.repository_id == context.repository.id

        # Delete any label mapping rows.
        (TagManifestLabelMap.delete().where(
            TagManifestLabelMap.tag_manifest == legacy_manifest_id).execute())

        # Delete the label rows.
        TagManifestLabel.delete().where(
            TagManifestLabel.annotated == legacy_manifest_id).execute()

        # Delete the mapping row if it exists.
        try:
            tmt = (TagManifestToManifest.select().where(
                TagManifestToManifest.tag_manifest == tag_manifest).get())
            context.add_manifest_id(tmt.manifest_id)
            tmt_deleted = tmt.delete_instance()
            if tmt_deleted:
                gc_table_rows_deleted.labels(
                    table="TagManifestToManifest").inc()
        except TagManifestToManifest.DoesNotExist:
            pass

        # Delete the tag manifest.
        tag_manifest_deleted = tag_manifest.delete_instance()
        if tag_manifest_deleted:
            gc_table_rows_deleted.labels(table="TagManifest").inc()
    return True
Ejemplo n.º 6
0
Archivo: gc.py Proyecto: kleesc/quay
def _purge_pre_oci_tag(tag, context, allow_non_expired=False):
    assert tag.repository_id == context.repository.id

    if not allow_non_expired:
        assert tag.lifetime_end_ts is not None
        assert tag.lifetime_end_ts <= pre_oci_tag.get_epoch_timestamp()

    # If it exists, GC the tag manifest.
    try:
        tag_manifest = TagManifest.select().where(TagManifest.tag == tag).get()
        _garbage_collect_legacy_manifest(tag_manifest.id, context)
    except TagManifest.DoesNotExist:
        pass

    # Add the tag's legacy image to be GCed.
    context.add_legacy_image_id(tag.image_id)

    with db_transaction():
        # Reload the tag and verify its lifetime_end_ts has not changed.
        try:
            reloaded_tag = db_for_update(RepositoryTag.select().where(
                RepositoryTag.id == tag.id)).get()
        except RepositoryTag.DoesNotExist:
            return False

        assert reloaded_tag.id == tag.id
        assert reloaded_tag.repository_id == context.repository.id
        if reloaded_tag.lifetime_end_ts != tag.lifetime_end_ts:
            return False

        # Delete mapping rows.
        deleted_tag_to_repotag = (TagToRepositoryTag.delete().where(
            TagToRepositoryTag.repository_tag == reloaded_tag).execute())

        # Delete the tag.
        reloaded_tag.delete_instance()

    gc_table_rows_deleted.labels(table="RepositoryTag").inc()
    gc_table_rows_deleted.labels(
        table="TagToRepositoryTag").inc(deleted_tag_to_repotag)
Ejemplo n.º 7
0
Archivo: gc.py Proyecto: kleesc/quay
def purge_repository(repo, force=False):
    """
    Completely delete all traces of the repository.

    Will return True upon complete success, and False upon partial or total failure. Garbage
    collection is incremental and repeatable, so this return value does not need to be checked or
    responded to.
    """
    assert repo.state == RepositoryState.MARKED_FOR_DELETION or force

    # Update the repo state to ensure nothing else is written to it.
    repo.state = RepositoryState.MARKED_FOR_DELETION
    repo.save()

    # Delete the repository of all Appr-referenced entries.
    # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags,
    # and MySQL is... particular... about such relationships when deleting.
    if repo.kind.name == "application":
        fst_pass = (ApprTag.delete().where(
            ApprTag.repository == repo,
            ~(ApprTag.linked_tag >> None)).execute())
        snd_pass = ApprTag.delete().where(ApprTag.repository == repo).execute()
        gc_table_rows_deleted.labels(table="ApprTag").inc(fst_pass + snd_pass)
    else:
        # GC to remove the images and storage.
        _purge_repository_contents(repo)

    # Ensure there are no additional tags, manifests, images or blobs in the repository.
    assert ApprTag.select().where(ApprTag.repository == repo).count() == 0
    assert Tag.select().where(Tag.repository == repo).count() == 0
    assert RepositoryTag.select().where(
        RepositoryTag.repository == repo).count() == 0
    assert Manifest.select().where(Manifest.repository == repo).count() == 0
    assert ManifestBlob.select().where(
        ManifestBlob.repository == repo).count() == 0
    assert UploadedBlob.select().where(
        UploadedBlob.repository == repo).count() == 0
    assert (ManifestSecurityStatus.select().where(
        ManifestSecurityStatus.repository == repo).count() == 0)
    assert Image.select().where(Image.repository == repo).count() == 0

    # Delete any repository build triggers, builds, and any other large-ish reference tables for
    # the repository.
    _chunk_delete_all(repo, RepositoryPermission, force=force)
    _chunk_delete_all(repo, RepositoryBuild, force=force)
    _chunk_delete_all(repo, RepositoryBuildTrigger, force=force)
    _chunk_delete_all(repo, RepositoryActionCount, force=force)
    _chunk_delete_all(repo, Star, force=force)
    _chunk_delete_all(repo, AccessToken, force=force)
    _chunk_delete_all(repo, RepositoryNotification, force=force)
    _chunk_delete_all(repo, BlobUpload, force=force)
    _chunk_delete_all(repo, RepoMirrorConfig, force=force)
    _chunk_delete_all(repo, RepositoryAuthorizedEmail, force=force)

    # Delete any marker rows for the repository.
    DeletedRepository.delete().where(
        DeletedRepository.repository == repo).execute()

    # Delete the rest of the repository metadata.
    try:
        # Make sure the repository still exists.
        fetched = Repository.get(id=repo.id)
    except Repository.DoesNotExist:
        return False

    try:
        fetched.delete_instance(recursive=True,
                                delete_nullable=False,
                                force=force)
        gc_repos_purged.inc()
        return True
    except IntegrityError:
        return False
Ejemplo n.º 8
0
Archivo: gc.py Proyecto: kleesc/quay
def _garbage_collect_manifest(manifest_id, context):
    assert manifest_id is not None

    # Make sure the manifest isn't referenced.
    if _check_manifest_used(manifest_id):
        return False

    # Add the manifest's blobs to the context to be GCed.
    for manifest_blob in ManifestBlob.select().where(
            ManifestBlob.manifest == manifest_id):
        context.add_blob_id(manifest_blob.blob_id)

    # Retrieve the manifest's associated image, if any.
    try:
        legacy_image_id = ManifestLegacyImage.get(
            manifest=manifest_id).image_id
        context.add_legacy_image_id(legacy_image_id)
    except ManifestLegacyImage.DoesNotExist:
        legacy_image_id = None

    # Add child manifests to be GCed.
    for connector in ManifestChild.select().where(
            ManifestChild.manifest == manifest_id):
        context.add_manifest_id(connector.child_manifest_id)

    # Add the labels to be GCed.
    for manifest_label in ManifestLabel.select().where(
            ManifestLabel.manifest == manifest_id):
        context.add_label_id(manifest_label.label_id)

    # Delete the manifest.
    with db_transaction():
        try:
            manifest = Manifest.select().where(
                Manifest.id == manifest_id).get()
        except Manifest.DoesNotExist:
            return False

        assert manifest.id == manifest_id
        assert manifest.repository_id == context.repository.id
        if _check_manifest_used(manifest_id):
            return False

        # Delete any label mappings.
        deleted_tag_manifest_label_map = (TagManifestLabelMap.delete().where(
            TagManifestLabelMap.manifest == manifest_id).execute())

        # Delete any mapping rows for the manifest.
        deleted_tag_manifest_to_manifest = (
            TagManifestToManifest.delete().where(
                TagManifestToManifest.manifest == manifest_id).execute())

        # Delete any label rows.
        deleted_manifest_label = (ManifestLabel.delete().where(
            ManifestLabel.manifest == manifest_id,
            ManifestLabel.repository == context.repository,
        ).execute())

        # Delete any child manifest rows.
        deleted_manifest_child = (ManifestChild.delete().where(
            ManifestChild.manifest == manifest_id,
            ManifestChild.repository == context.repository,
        ).execute())

        # Delete the manifest blobs for the manifest.
        deleted_manifest_blob = (ManifestBlob.delete().where(
            ManifestBlob.manifest == manifest_id,
            ManifestBlob.repository == context.repository).execute())

        # Delete the security status for the manifest
        deleted_manifest_security = (ManifestSecurityStatus.delete().where(
            ManifestSecurityStatus.manifest == manifest_id,
            ManifestSecurityStatus.repository == context.repository,
        ).execute())

        # Delete the manifest legacy image row.
        deleted_manifest_legacy_image = 0
        if legacy_image_id:
            deleted_manifest_legacy_image = (
                ManifestLegacyImage.delete().where(
                    ManifestLegacyImage.manifest == manifest_id,
                    ManifestLegacyImage.repository == context.repository,
                ).execute())

        # Delete the manifest.
        manifest.delete_instance()

    context.mark_manifest_removed(manifest)

    gc_table_rows_deleted.labels(
        table="TagManifestLabelMap").inc(deleted_tag_manifest_label_map)
    gc_table_rows_deleted.labels(
        table="TagManifestToManifest").inc(deleted_tag_manifest_to_manifest)
    gc_table_rows_deleted.labels(
        table="ManifestLabel").inc(deleted_manifest_label)
    gc_table_rows_deleted.labels(
        table="ManifestChild").inc(deleted_manifest_child)
    gc_table_rows_deleted.labels(
        table="ManifestBlob").inc(deleted_manifest_blob)
    gc_table_rows_deleted.labels(
        table="ManifestSecurityStatus").inc(deleted_manifest_security)
    if deleted_manifest_legacy_image:
        gc_table_rows_deleted.labels(
            table="ManifestLegacyImage").inc(deleted_manifest_legacy_image)

    gc_table_rows_deleted.labels(table="Manifest").inc()

    return True
Ejemplo n.º 9
0
def garbage_collect_storage(storage_id_whitelist):
    """
    Performs GC on a possible subset of the storage's with the IDs found in the whitelist.

    The storages in the whitelist will be checked, and any orphaned will be removed, with those IDs
    being returned.
    """
    if len(storage_id_whitelist) == 0:
        return []

    def placements_to_filtered_paths_set(placements_list):
        """
        Returns the list of paths to remove from storage, filtered from the given placements query
        by removing any CAS paths that are still referenced by storage(s) in the database.
        """
        if not placements_list:
            return set()

        with ensure_under_transaction():
            # Find the content checksums not referenced by other storages. Any that are, we cannot
            # remove.
            content_checksums = set([
                placement.storage.content_checksum
                for placement in placements_list if placement.storage.cas_path
            ])

            unreferenced_checksums = set()
            if content_checksums:
                # Check the current image storage.
                query = ImageStorage.select(
                    ImageStorage.content_checksum
                ).where(
                    ImageStorage.content_checksum << list(content_checksums))
                is_referenced_checksums = set([
                    image_storage.content_checksum for image_storage in query
                ])
                if is_referenced_checksums:
                    logger.warning(
                        "GC attempted to remove CAS checksums %s, which are still IS referenced",
                        is_referenced_checksums,
                    )

                # Check the ApprBlob table as well.
                query = ApprBlob.select(ApprBlob.digest).where(
                    ApprBlob.digest << list(content_checksums))
                appr_blob_referenced_checksums = set(
                    [blob.digest for blob in query])
                if appr_blob_referenced_checksums:
                    logger.warning(
                        "GC attempted to remove CAS checksums %s, which are ApprBlob referenced",
                        appr_blob_referenced_checksums,
                    )

                unreferenced_checksums = (content_checksums -
                                          appr_blob_referenced_checksums -
                                          is_referenced_checksums)

            # Return all placements for all image storages found not at a CAS path or with a content
            # checksum that is referenced.
            return {
                (
                    get_image_location_for_id(placement.location_id).name,
                    get_layer_path(placement.storage),
                    placement.storage.content_checksum,
                )
                for placement in placements_list
                if not placement.storage.cas_path
                or placement.storage.content_checksum in unreferenced_checksums
            }

    # Note: Both of these deletes must occur in the same transaction (unfortunately) because a
    # storage without any placement is invalid, and a placement cannot exist without a storage.
    # TODO: We might want to allow for null storages on placements, which would allow us to
    # delete the storages, then delete the placements in a non-transaction.
    logger.debug("Garbage collecting storages from candidates: %s",
                 storage_id_whitelist)
    paths_to_remove = []
    orphaned_storage_ids = set()
    for storage_id_to_check in storage_id_whitelist:
        logger.debug("Garbage collecting storage %s", storage_id_to_check)

        with db_transaction():
            if not _is_storage_orphaned(storage_id_to_check):
                continue

            orphaned_storage_ids.add(storage_id_to_check)

            placements_to_remove = list(
                ImageStoragePlacement.select(
                    ImageStoragePlacement,
                    ImageStorage).join(ImageStorage).where(
                        ImageStorage.id == storage_id_to_check))

            # Remove the placements for orphaned storages
            deleted_image_storage_placement = 0
            if placements_to_remove:
                deleted_image_storage_placement = (
                    ImageStoragePlacement.delete().where(
                        ImageStoragePlacement.storage ==
                        storage_id_to_check).execute())

            # Remove all orphaned storages
            deleted_torrent_info = (TorrentInfo.delete().where(
                TorrentInfo.storage == storage_id_to_check).execute())

            deleted_image_storage_signature = (
                ImageStorageSignature.delete().where(
                    ImageStorageSignature.storage ==
                    storage_id_to_check).execute())

            deleted_image_storage = (ImageStorage.delete().where(
                ImageStorage.id == storage_id_to_check).execute())

            # Determine the paths to remove. We cannot simply remove all paths matching storages, as CAS
            # can share the same path. We further filter these paths by checking for any storages still in
            # the database with the same content checksum.
            paths_to_remove.extend(
                placements_to_filtered_paths_set(placements_to_remove))

        gc_table_rows_deleted.labels(
            table="TorrentInfo").inc(deleted_torrent_info)
        gc_table_rows_deleted.labels(
            table="ImageStorageSignature").inc(deleted_image_storage_signature)
        gc_table_rows_deleted.labels(
            table="ImageStorage").inc(deleted_image_storage)
        gc_table_rows_deleted.labels(
            table="ImageStoragePlacement").inc(deleted_image_storage_placement)

    # We are going to make the conscious decision to not delete image storage blobs inside
    # transactions.
    # This may end up producing garbage in s3, trading off for higher availability in the database.
    paths_to_remove = list(set(paths_to_remove))
    for location_name, image_path, storage_checksum in paths_to_remove:
        if storage_checksum:
            # Skip any specialized blob digests that we know we should keep around.
            if storage_checksum in SPECIAL_BLOB_DIGESTS:
                continue

            # Perform one final check to ensure the blob is not needed.
            if (ImageStorage.select().where(ImageStorage.content_checksum ==
                                            storage_checksum).exists()):
                continue

        logger.debug("Removing %s from %s", image_path, location_name)
        config.store.remove({location_name}, image_path)
        gc_storage_blobs_deleted.inc()

    return orphaned_storage_ids