def _is_storage_orphaned(candidate_id): """ Returns the whether the given candidate storage ID is orphaned. Must be executed under a transaction. """ with ensure_under_transaction(): try: ManifestBlob.get(blob=candidate_id) return False except ManifestBlob.DoesNotExist: pass try: Image.get(storage=candidate_id) return False except Image.DoesNotExist: pass try: UploadedBlob.get(blob=candidate_id) return False except UploadedBlob.DoesNotExist: pass return True
def test_purge_repository_storage_blob(default_tag_policy, initialized_db): with populate_storage_for_gc(): expected_blobs_removed_from_storage = set() preferred = storage.preferred_locations[0] # Check that existing uploadedblobs has an object in storage for repo in database.Repository.select().order_by(database.Repository.id): for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo): assert storage.exists( {preferred}, storage.blob_path(uploadedblob.blob.content_checksum) ) # Remove eveyrhing for repo in database.Repository.select(): # .order_by(database.Repository.id): for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo): # Check if only this repository is referencing the uploadedblob # If so, the blob should be removed from storage has_depedent_manifestblob = ( ManifestBlob.select() .where( ManifestBlob.blob == uploadedblob.blob, ManifestBlob.repository != repo, ) .count() ) has_dependent_image = ( Image.select() .where( Image.storage == uploadedblob.blob, Image.repository != repo, ) .count() ) has_dependent_uploadedblobs = ( UploadedBlob.select() .where( UploadedBlob == uploadedblob, UploadedBlob.repository != repo, ) .count() ) if ( not has_depedent_manifestblob and not has_dependent_image and not has_dependent_uploadedblobs ): expected_blobs_removed_from_storage.add(uploadedblob.blob) assert model.gc.purge_repository(repo, force=True) for removed_blob_from_storage in expected_blobs_removed_from_storage: assert not storage.exists( {preferred}, storage.blob_path(removed_blob_from_storage.content_checksum) )
def _get_dangling_storage_count(): storage_ids = set([current.id for current in ImageStorage.select()]) referenced_by_image = set([image.storage_id for image in Image.select()]) referenced_by_manifest = set( [blob.blob_id for blob in ManifestBlob.select()]) referenced_by_uploaded = set( [upload.blob_id for upload in UploadedBlob.select()]) referenced_by_derived_image = set( [derived.derivative_id for derived in DerivedStorageForImage.select()]) return len(storage_ids - referenced_by_image - referenced_by_derived_image - referenced_by_manifest - referenced_by_uploaded)
def _temp_link_blob(repository_id, storage, link_expiration_s): """ Note: Should *always* be called by a parent under a transaction. """ try: repository = Repository.get(id=repository_id) except Repository.DoesNotExist: return None if repository.state == RepositoryState.MARKED_FOR_DELETION: return None return UploadedBlob.create( repository=repository_id, blob=storage, expires_at=datetime.utcnow() + timedelta(seconds=link_expiration_s), )
def test_garbage_collect_storage(default_tag_policy, initialized_db): with populate_storage_for_gc(): preferred = storage.preferred_locations[0] # Get a random sample of storages uploadedblobs = list(UploadedBlob.select()) random_uploadedblobs = random.sample( uploadedblobs, random.randrange(1, len(uploadedblobs) + 1) ) model.storage.garbage_collect_storage([b.blob.id for b in random_uploadedblobs]) # Ensure that the blobs' storage weren't removed, since we didn't GC anything for uploadedblob in random_uploadedblobs: assert storage.exists( {preferred}, storage.blob_path(uploadedblob.blob.content_checksum) )
def lookup_expired_uploaded_blobs(repository): """ Looks up all expired uploaded blobs in a repository. """ return UploadedBlob.select().where( UploadedBlob.repository == repository, UploadedBlob.expires_at <= datetime.utcnow())
def _delete_temp_links(repo): """ Deletes any temp links to blobs. """ UploadedBlob.delete().where(UploadedBlob.repository == repo).execute()
def assert_gc_integrity(expect_storage_removed=True): """ Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes the callback for images removed and doesn't invoke the callback for images *not* removed. """ # Add a callback for when images are removed. removed_image_storages = [] remove_callback = model.config.register_image_cleanup_callback( removed_image_storages.extend) # Store existing storages. We won't verify these for existence because they # were likely created as test data. existing_digests = set() for storage_row in ImageStorage.select(): if storage_row.cas_path: existing_digests.add(storage_row.content_checksum) for blob_row in ApprBlob.select(): existing_digests.add(blob_row.digest) # Store the number of dangling objects. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() # Yield to the GC test. with check_transitive_modifications(): try: yield finally: remove_callback() # Ensure the number of dangling storages, manifests and labels has not changed. updated_storage_count = _get_dangling_storage_count() assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() assert updated_label_count == existing_label_count, _get_dangling_labels() updated_manifest_count = _get_dangling_manifest_count() assert updated_manifest_count == existing_manifest_count # Ensure that for each call to the image+storage cleanup callback, the image and its # storage is not found *anywhere* in the database. for removed_image_and_storage in removed_image_storages: assert isinstance(removed_image_and_storage, Image) try: # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row # with the same ID, make sure it does not have the same Docker Image ID. # See: https://www.sqlite.org/autoinc.html found_image = Image.get(id=removed_image_and_storage.id) assert (found_image.docker_image_id != removed_image_and_storage.docker_image_id ), "Found unexpected removed image %s under repo %s" % ( found_image.id, found_image.repository, ) except Image.DoesNotExist: pass # Ensure that image storages are only removed if not shared. shared = Image.select().where( Image.storage == removed_image_and_storage.storage_id).count() if shared == 0: shared = (ManifestBlob.select().where( ManifestBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: shared = (UploadedBlob.select().where( UploadedBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) # Ensure all CAS storage is in the storage engine. preferred = storage.preferred_locations[0] for storage_row in ImageStorage.select(): if storage_row.content_checksum in existing_digests: continue if storage_row.cas_path: storage.get_content({preferred}, storage.blob_path( storage_row.content_checksum)) for blob_row in ApprBlob.select(): if blob_row.digest in existing_digests: continue storage.get_content({preferred}, storage.blob_path(blob_row.digest)) # Ensure all tags have valid manifests. for manifest in {t.manifest for t in Tag.select()}: # Ensure that the manifest's blobs all exist. found_blobs = { b.blob.content_checksum for b in ManifestBlob.select().where( ManifestBlob.manifest == manifest) } parsed = parse_manifest_from_bytes( Bytes.for_string_or_unicode(manifest.manifest_bytes), manifest.media_type.name) assert set(parsed.local_blob_digests) == found_blobs
def purge_repository(repo, force=False): """ Completely delete all traces of the repository. Will return True upon complete success, and False upon partial or total failure. Garbage collection is incremental and repeatable, so this return value does not need to be checked or responded to. """ assert repo.state == RepositoryState.MARKED_FOR_DELETION or force # Update the repo state to ensure nothing else is written to it. repo.state = RepositoryState.MARKED_FOR_DELETION repo.save() # Delete the repository of all Appr-referenced entries. # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags, # and MySQL is... particular... about such relationships when deleting. if repo.kind.name == "application": fst_pass = (ApprTag.delete().where( ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute()) snd_pass = ApprTag.delete().where(ApprTag.repository == repo).execute() gc_table_rows_deleted.labels(table="ApprTag").inc(fst_pass + snd_pass) else: # GC to remove the images and storage. _purge_repository_contents(repo) # Ensure there are no additional tags, manifests, images or blobs in the repository. assert ApprTag.select().where(ApprTag.repository == repo).count() == 0 assert Tag.select().where(Tag.repository == repo).count() == 0 assert RepositoryTag.select().where( RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where( ManifestBlob.repository == repo).count() == 0 assert UploadedBlob.select().where( UploadedBlob.repository == repo).count() == 0 assert (ManifestSecurityStatus.select().where( ManifestSecurityStatus.repository == repo).count() == 0) assert Image.select().where(Image.repository == repo).count() == 0 # Delete any repository build triggers, builds, and any other large-ish reference tables for # the repository. _chunk_delete_all(repo, RepositoryPermission, force=force) _chunk_delete_all(repo, RepositoryBuild, force=force) _chunk_delete_all(repo, RepositoryBuildTrigger, force=force) _chunk_delete_all(repo, RepositoryActionCount, force=force) _chunk_delete_all(repo, Star, force=force) _chunk_delete_all(repo, AccessToken, force=force) _chunk_delete_all(repo, RepositoryNotification, force=force) _chunk_delete_all(repo, BlobUpload, force=force) _chunk_delete_all(repo, RepoMirrorConfig, force=force) _chunk_delete_all(repo, RepositoryAuthorizedEmail, force=force) # Delete any marker rows for the repository. DeletedRepository.delete().where( DeletedRepository.repository == repo).execute() # Delete the rest of the repository metadata. try: # Make sure the repository still exists. fetched = Repository.get(id=repo.id) except Repository.DoesNotExist: return False try: fetched.delete_instance(recursive=True, delete_nullable=False, force=force) gc_repos_purged.inc() return True except IntegrityError: return False
def _purge_repository_contents(repo): """ Purges all the contents of a repository, removing all of its tags, manifests and images. """ logger.debug("Purging repository %s", repo) # Purge via all the tags. while True: found = False for tags in _chunk_iterate_for_deletion( Tag.select().where(Tag.repository == repo)): logger.debug("Found %s tags to GC under repository %s", len(tags), repo) found = True context = _GarbageCollectorContext(repo) for tag in tags: logger.debug("Deleting tag %s under repository %s", tag, repo) assert tag.repository_id == repo.id _purge_oci_tag(tag, context, allow_non_expired=True) _run_garbage_collection(context) if not found: break # Purge any uploaded blobs that have expired. while True: found = False for uploaded_blobs in _chunk_iterate_for_deletion( UploadedBlob.select().where(UploadedBlob.repository == repo)): logger.debug("Found %s uploaded blobs to GC under repository %s", len(uploaded_blobs), repo) found = True context = _GarbageCollectorContext(repo) for uploaded_blob in uploaded_blobs: logger.debug("Deleting uploaded blob %s under repository %s", uploaded_blob, repo) assert uploaded_blob.repository_id == repo.id _purge_uploaded_blob(uploaded_blob, context, allow_non_expired=True) _run_garbage_collection(context) if not found: break # TODO: remove this once we've removed the foreign key constraints from RepositoryTag # and Image. while True: found = False repo_tag_query = RepositoryTag.select().where( RepositoryTag.repository == repo) for tags in _chunk_iterate_for_deletion(repo_tag_query): logger.debug("Found %s tags to GC under repository %s", len(tags), repo) found = True context = _GarbageCollectorContext(repo) for tag in tags: logger.debug("Deleting tag %s under repository %s", tag, repo) assert tag.repository_id == repo.id _purge_pre_oci_tag(tag, context, allow_non_expired=True) _run_garbage_collection(context) if not found: break assert Tag.select().where(Tag.repository == repo).count() == 0 assert RepositoryTag.select().where( RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where( ManifestBlob.repository == repo).count() == 0 assert UploadedBlob.select().where( UploadedBlob.repository == repo).count() == 0 # Add all remaining images to a new context. We do this here to minimize the number of images # we need to load. while True: found_image = False image_context = _GarbageCollectorContext(repo) existing_count = Image.select().where(Image.repository == repo).count() if not existing_count: break for image in Image.select().where(Image.repository == repo): found_image = True logger.debug("Trying to delete image %s under repository %s", image, repo) assert image.repository_id == repo.id image_context.add_legacy_image_id(image.id) _run_garbage_collection(image_context) new_count = Image.select().where(Image.repository == repo).count() if new_count >= existing_count: raise Exception("GC purge bug! Please report this to support!")