def test_manifestbackfillworker_mislinked_manifest(clear_rows, initialized_db): """ Tests that a manifest whose image is mislinked will have its storages relinked properly. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() repo = model.repository.get_repository('devtable', 'complex') tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0') tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0') # Add a mislinked manifest, by having its layer point to a blob in v3.0 but its image # be the v5.0 image. builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag') builder.add_layer(tag_v30.image.storage.content_checksum, '{"id": "foo"}') manifest = builder.build(docker_v2_signing_key) mislinked_manifest = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest, tag=tag_v50) # Backfill the manifest and ensure its proper content checksum was linked. assert _backfill_manifest(mislinked_manifest) map_row = TagManifestToManifest.get(tag_manifest=mislinked_manifest) assert not map_row.broken manifest_row = map_row.manifest legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert legacy_image == tag_v50.image manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)) assert len(manifest_blobs) == 1 assert manifest_blobs[0].blob.content_checksum == tag_v30.image.storage.content_checksum
def test_store_tag_manifest(get_storages, initialized_db): # Create a manifest with some layers. builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'sometag') storages = get_storages() assert storages repo = model.repository.get_repository('devtable', 'simple') storage_id_map = {} for index, storage in enumerate(storages): image_id = 'someimage%s' % index builder.add_layer(storage.content_checksum, json.dumps({'id': image_id})) find_create_or_link_image(image_id, repo, 'devtable', {}, 'local_us') storage_id_map[storage.content_checksum] = storage.id manifest = builder.build(docker_v2_signing_key) tag_manifest, _ = store_tag_manifest_for_testing('devtable', 'simple', 'sometag', manifest, manifest.leaf_layer_v1_image_id, storage_id_map) # Ensure we have the new-model expected rows. mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest) assert mapping_row.manifest is not None assert mapping_row.manifest.manifest_bytes == manifest.bytes.as_encoded_str() assert mapping_row.manifest.digest == str(manifest.digest) blob_rows = {m.blob_id for m in ManifestBlob.select().where(ManifestBlob.manifest == mapping_row.manifest)} assert blob_rows == {s.id for s in storages} assert ManifestLegacyImage.get(manifest=mapping_row.manifest).image == tag_manifest.tag.image
def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initialized_db): """ Tests that a manifest whose image is mislinked will attempt to have its storages relinked properly. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() repo = model.repository.get_repository("devtable", "complex") tag_v50 = model.tag.get_active_tag("devtable", "gargantuan", "v5.0") # Add a mislinked manifest, by having its layer point to an invalid blob but its image # be the v5.0 image. builder = DockerSchema1ManifestBuilder("devtable", "gargantuan", "sometag") builder.add_layer("sha256:deadbeef", '{"id": "foo"}') manifest = builder.build(docker_v2_signing_key) broken_manifest = TagManifest.create( json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest, tag=tag_v50 ) # Backfill the manifest and ensure it is marked as broken. assert _backfill_manifest(broken_manifest) map_row = TagManifestToManifest.get(tag_manifest=broken_manifest) assert map_row.broken manifest_row = map_row.manifest legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert legacy_image == tag_v50.image manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)) assert len(manifest_blobs) == 0
def test_create_placeholder_blobs_for_new_manifest(self, create_repo): repo_ref = create_repo(self.orgname, self.upstream_repository, self.user) input_manifest = parse_manifest_from_bytes( Bytes.for_string_or_unicode(UBI8_8_4_MANIFEST_SCHEMA2), DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, ) proxy_model = ProxyModel( self.orgname, self.upstream_repository, self.user, ) manifest, _ = proxy_model._create_manifest_and_retarget_tag( repo_ref, input_manifest, self.tag) assert manifest is not None blob_count = 1 # schema 2 manifests have one extra config blob blob_count += len(input_manifest.manifest_dict["layers"]) mblobs = ManifestBlob.select().where( ManifestBlob.manifest == manifest.id) assert blob_count == mblobs.count() expected_digests = [ layer["digest"] for layer in input_manifest.manifest_dict["layers"] ] expected_digests.append(input_manifest.config.digest) created_digests = [mblob.blob.content_checksum for mblob in mblobs] assert sorted(expected_digests) == sorted(created_digests)
def backfill_replication(): encountered = set() query = (ManifestBlob.select(ManifestBlob, Repository, User).join( ImageStorage).switch(ManifestBlob).join(Repository).join(User)) for manifest in query: if manifest.blob.uuid in encountered: continue namespace = manifest.repository.namespace_user locations = model.user.get_region_locations(namespace) locations_required = locations | set(storage.default_locations) query = (ImageStoragePlacement.select( ImageStoragePlacement, ImageStorageLocation).where( ImageStoragePlacement.storage == manifest.blob).join( ImageStorageLocation)) existing_locations = set([p.location.name for p in query]) locations_missing = locations_required - existing_locations if locations_missing: print("Enqueueing manifest blob %s to be replicated" % (manifest.blob.uuid)) encountered.add(manifest.blob.uuid) if not image_replication_queue.alive([manifest.blob.uuid]): queue_storage_replication( manifest.repository.namespace_user.username, manifest.blob)
def _get_dangling_storage_count(): storage_ids = set([current.id for current in ImageStorage.select()]) referenced_by_image = set([image.storage_id for image in Image.select()]) referenced_by_manifest = set([blob.blob_id for blob in ManifestBlob.select()]) referenced_by_derived = set( [derived.derivative_id for derived in DerivedStorageForImage.select()] ) return len(storage_ids - referenced_by_image - referenced_by_derived - referenced_by_manifest)
def purge_repository(repo, force=False): """ Completely delete all traces of the repository. Will return True upon complete success, and False upon partial or total failure. Garbage collection is incremental and repeatable, so this return value does not need to be checked or responded to. """ assert repo.state == RepositoryState.MARKED_FOR_DELETION or force # Delete the repository of all Appr-referenced entries. # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags, # and MySQL is... particular... about such relationships when deleting. if repo.kind.name == "application": ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute() ApprTag.delete().where(ApprTag.repository == repo).execute() else: # GC to remove the images and storage. _purge_repository_contents(repo) # Ensure there are no additional tags, manifests, images or blobs in the repository. assert ApprTag.select().where(ApprTag.repository == repo).count() == 0 assert Tag.select().where(Tag.repository == repo).count() == 0 assert RepositoryTag.select().where( RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where( ManifestBlob.repository == repo).count() == 0 assert Image.select().where(Image.repository == repo).count() == 0 # Delete any repository build triggers, builds, and any other large-ish reference tables for # the repository. _chunk_delete_all(repo, RepositoryPermission, force=force) _chunk_delete_all(repo, RepositoryBuild, force=force) _chunk_delete_all(repo, RepositoryBuildTrigger, force=force) _chunk_delete_all(repo, RepositoryActionCount, force=force) _chunk_delete_all(repo, Star, force=force) _chunk_delete_all(repo, AccessToken, force=force) _chunk_delete_all(repo, RepositoryNotification, force=force) _chunk_delete_all(repo, BlobUpload, force=force) _chunk_delete_all(repo, RepoMirrorConfig, force=force) _chunk_delete_all(repo, RepositoryAuthorizedEmail, force=force) # Delete any marker rows for the repository. DeletedRepository.delete().where( DeletedRepository.repository == repo).execute() # Delete the rest of the repository metadata. try: # Make sure the repository still exists. fetched = Repository.get(id=repo.id) except Repository.DoesNotExist: return False fetched.delete_instance(recursive=True, delete_nullable=False, force=force) return True
def test_purge_repository_storage_blob(default_tag_policy, initialized_db): with populate_storage_for_gc(): expected_blobs_removed_from_storage = set() preferred = storage.preferred_locations[0] # Check that existing uploadedblobs has an object in storage for repo in database.Repository.select().order_by(database.Repository.id): for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo): assert storage.exists( {preferred}, storage.blob_path(uploadedblob.blob.content_checksum) ) # Remove eveyrhing for repo in database.Repository.select(): # .order_by(database.Repository.id): for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo): # Check if only this repository is referencing the uploadedblob # If so, the blob should be removed from storage has_depedent_manifestblob = ( ManifestBlob.select() .where( ManifestBlob.blob == uploadedblob.blob, ManifestBlob.repository != repo, ) .count() ) has_dependent_image = ( Image.select() .where( Image.storage == uploadedblob.blob, Image.repository != repo, ) .count() ) has_dependent_uploadedblobs = ( UploadedBlob.select() .where( UploadedBlob == uploadedblob, UploadedBlob.repository != repo, ) .count() ) if ( not has_depedent_manifestblob and not has_dependent_image and not has_dependent_uploadedblobs ): expected_blobs_removed_from_storage.add(uploadedblob.blob) assert model.gc.purge_repository(repo, force=True) for removed_blob_from_storage in expected_blobs_removed_from_storage: assert not storage.exists( {preferred}, storage.blob_path(removed_blob_from_storage.content_checksum) )
def purge_repository(namespace_name, repository_name): """ Completely delete all traces of the repository. Will return True upon complete success, and False upon partial or total failure. Garbage collection is incremental and repeatable, so this return value does not need to be checked or responded to. """ try: repo = _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: return False assert repo.name == repository_name # Delete the repository of all Appr-referenced entries. # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags, # and MySQL is... particular... about such relationships when deleting. if repo.kind.name == 'application': ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute() ApprTag.delete().where(ApprTag.repository == repo).execute() else: # GC to remove the images and storage. _purge_repository_contents(repo) # Ensure there are no additional tags, manifests, images or blobs in the repository. assert ApprTag.select().where(ApprTag.repository == repo).count() == 0 assert Tag.select().where(Tag.repository == repo).count() == 0 assert RepositoryTag.select().where(RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where(ManifestBlob.repository == repo).count() == 0 assert Image.select().where(Image.repository == repo).count() == 0 # Delete the rest of the repository metadata. try: # Make sure the repository still exists. fetched = _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: return False fetched.delete_instance(recursive=True, delete_nullable=False) # Run callbacks for callback in config.repo_cleanup_callbacks: callback(namespace_name, repository_name) return True
def _garbage_collect_manifest(manifest_id, context): assert manifest_id is not None # Make sure the manifest isn't referenced. if _check_manifest_used(manifest_id): return False # Add the manifest's blobs to the context to be GCed. for manifest_blob in ManifestBlob.select().where( ManifestBlob.manifest == manifest_id): context.add_blob_id(manifest_blob.blob_id) # Retrieve the manifest's associated image, if any. try: legacy_image_id = ManifestLegacyImage.get( manifest=manifest_id).image_id context.add_legacy_image_id(legacy_image_id) except ManifestLegacyImage.DoesNotExist: legacy_image_id = None # Add child manifests to be GCed. for connector in ManifestChild.select().where( ManifestChild.manifest == manifest_id): context.add_manifest_id(connector.child_manifest_id) # Add the labels to be GCed. for manifest_label in ManifestLabel.select().where( ManifestLabel.manifest == manifest_id): context.add_label_id(manifest_label.label_id) # Delete the manifest. with db_transaction(): try: manifest = Manifest.select().where( Manifest.id == manifest_id).get() except Manifest.DoesNotExist: return False assert manifest.id == manifest_id assert manifest.repository_id == context.repository.id if _check_manifest_used(manifest_id): return False # Delete any label mappings. deleted_tag_manifest_label_map = (TagManifestLabelMap.delete().where( TagManifestLabelMap.manifest == manifest_id).execute()) # Delete any mapping rows for the manifest. deleted_tag_manifest_to_manifest = ( TagManifestToManifest.delete().where( TagManifestToManifest.manifest == manifest_id).execute()) # Delete any label rows. deleted_manifest_label = (ManifestLabel.delete().where( ManifestLabel.manifest == manifest_id, ManifestLabel.repository == context.repository, ).execute()) # Delete any child manifest rows. deleted_manifest_child = (ManifestChild.delete().where( ManifestChild.manifest == manifest_id, ManifestChild.repository == context.repository, ).execute()) # Delete the manifest blobs for the manifest. deleted_manifest_blob = (ManifestBlob.delete().where( ManifestBlob.manifest == manifest_id, ManifestBlob.repository == context.repository).execute()) # Delete the security status for the manifest deleted_manifest_security = (ManifestSecurityStatus.delete().where( ManifestSecurityStatus.manifest == manifest_id, ManifestSecurityStatus.repository == context.repository, ).execute()) # Delete the manifest legacy image row. deleted_manifest_legacy_image = 0 if legacy_image_id: deleted_manifest_legacy_image = ( ManifestLegacyImage.delete().where( ManifestLegacyImage.manifest == manifest_id, ManifestLegacyImage.repository == context.repository, ).execute()) # Delete the manifest. manifest.delete_instance() context.mark_manifest_removed(manifest) gc_table_rows_deleted.labels( table="TagManifestLabelMap").inc(deleted_tag_manifest_label_map) gc_table_rows_deleted.labels( table="TagManifestToManifest").inc(deleted_tag_manifest_to_manifest) gc_table_rows_deleted.labels( table="ManifestLabel").inc(deleted_manifest_label) gc_table_rows_deleted.labels( table="ManifestChild").inc(deleted_manifest_child) gc_table_rows_deleted.labels( table="ManifestBlob").inc(deleted_manifest_blob) gc_table_rows_deleted.labels( table="ManifestSecurityStatus").inc(deleted_manifest_security) if deleted_manifest_legacy_image: gc_table_rows_deleted.labels( table="ManifestLegacyImage").inc(deleted_manifest_legacy_image) gc_table_rows_deleted.labels(table="Manifest").inc() return True
def _purge_repository_contents(repo): """ Purges all the contents of a repository, removing all of its tags, manifests and images. """ logger.debug("Purging repository %s", repo) # Purge via all the tags. while True: found = False for tags in _chunk_iterate_for_deletion( Tag.select().where(Tag.repository == repo)): logger.debug("Found %s tags to GC under repository %s", len(tags), repo) found = True context = _GarbageCollectorContext(repo) for tag in tags: logger.debug("Deleting tag %s under repository %s", tag, repo) assert tag.repository_id == repo.id _purge_oci_tag(tag, context, allow_non_expired=True) _run_garbage_collection(context) if not found: break # Purge any uploaded blobs that have expired. while True: found = False for uploaded_blobs in _chunk_iterate_for_deletion( UploadedBlob.select().where(UploadedBlob.repository == repo)): logger.debug("Found %s uploaded blobs to GC under repository %s", len(uploaded_blobs), repo) found = True context = _GarbageCollectorContext(repo) for uploaded_blob in uploaded_blobs: logger.debug("Deleting uploaded blob %s under repository %s", uploaded_blob, repo) assert uploaded_blob.repository_id == repo.id _purge_uploaded_blob(uploaded_blob, context, allow_non_expired=True) _run_garbage_collection(context) if not found: break # TODO: remove this once we've removed the foreign key constraints from RepositoryTag # and Image. while True: found = False repo_tag_query = RepositoryTag.select().where( RepositoryTag.repository == repo) for tags in _chunk_iterate_for_deletion(repo_tag_query): logger.debug("Found %s tags to GC under repository %s", len(tags), repo) found = True context = _GarbageCollectorContext(repo) for tag in tags: logger.debug("Deleting tag %s under repository %s", tag, repo) assert tag.repository_id == repo.id _purge_pre_oci_tag(tag, context, allow_non_expired=True) _run_garbage_collection(context) if not found: break assert Tag.select().where(Tag.repository == repo).count() == 0 assert RepositoryTag.select().where( RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where( ManifestBlob.repository == repo).count() == 0 assert UploadedBlob.select().where( UploadedBlob.repository == repo).count() == 0 # Add all remaining images to a new context. We do this here to minimize the number of images # we need to load. while True: found_image = False image_context = _GarbageCollectorContext(repo) existing_count = Image.select().where(Image.repository == repo).count() if not existing_count: break for image in Image.select().where(Image.repository == repo): found_image = True logger.debug("Trying to delete image %s under repository %s", image, repo) assert image.repository_id == repo.id image_context.add_legacy_image_id(image.id) _run_garbage_collection(image_context) new_count = Image.select().where(Image.repository == repo).count() if new_count >= existing_count: raise Exception("GC purge bug! Please report this to support!")
def test_tagbackfillworker(clear_all_rows, initialized_db): # Remove the new-style rows so we can backfill. TagToRepositoryTag.delete().execute() Tag.delete().execute() if clear_all_rows: TagManifestLabelMap.delete().execute() ManifestLabel.delete().execute() ManifestBlob.delete().execute() ManifestLegacyImage.delete().execute() TagManifestToManifest.delete().execute() Manifest.delete().execute() found_dead_tag = False for repository_tag in list(RepositoryTag.select()): # Backfill the tag. assert backfill_tag(repository_tag) # Ensure if we try again, the backfill is skipped. assert not backfill_tag(repository_tag) # Ensure that we now have the expected tag rows. tag_to_repo_tag = TagToRepositoryTag.get(repository_tag=repository_tag) tag = tag_to_repo_tag.tag assert tag.name == repository_tag.name assert tag.repository == repository_tag.repository assert not tag.hidden assert tag.reversion == repository_tag.reversion if repository_tag.lifetime_start_ts is None: assert tag.lifetime_start_ms is None else: assert tag.lifetime_start_ms == (repository_tag.lifetime_start_ts * 1000) if repository_tag.lifetime_end_ts is None: assert tag.lifetime_end_ms is None else: assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts * 1000) found_dead_tag = True assert tag.manifest # Ensure that we now have the expected manifest rows. try: tag_manifest = TagManifest.get(tag=repository_tag) except TagManifest.DoesNotExist: continue map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) assert not map_row.broken manifest_row = map_row.manifest assert manifest_row.manifest_bytes == tag_manifest.json_data assert manifest_row.digest == tag_manifest.digest assert manifest_row.repository == tag_manifest.tag.repository assert tag.manifest == map_row.manifest legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert tag_manifest.tag.image == legacy_image expected_storages = {tag_manifest.tag.image.storage.id} for parent_image_id in tag_manifest.tag.image.ancestor_id_list(): expected_storages.add(Image.get(id=parent_image_id).storage_id) found_storages = { manifest_blob.blob_id for manifest_blob in ManifestBlob.select().where( ManifestBlob.manifest == manifest_row) } assert expected_storages == found_storages # Ensure the labels were copied over. tmls = list(TagManifestLabel.select().where( TagManifestLabel.annotated == tag_manifest)) expected_labels = {tml.label_id for tml in tmls} found_labels = { m.label_id for m in ManifestLabel.select().where( ManifestLabel.manifest == manifest_row) } assert found_labels == expected_labels # Verify at the repository level. for repository in list(Repository.select()): tags = RepositoryTag.select().where( RepositoryTag.repository == repository, RepositoryTag.hidden == False) oci_tags = Tag.select().where(Tag.repository == repository) assert len(tags) == len(oci_tags) assert {t.name for t in tags} == {t.name for t in oci_tags} for tag in tags: tag_manifest = TagManifest.get(tag=tag) ttr = TagToRepositoryTag.get(repository_tag=tag) manifest = ttr.tag.manifest assert tag_manifest.json_data == manifest.manifest_bytes assert tag_manifest.digest == manifest.digest assert tag.image == ManifestLegacyImage.get( manifest=manifest).image assert tag.lifetime_start_ts == (ttr.tag.lifetime_start_ms / 1000) if tag.lifetime_end_ts: assert tag.lifetime_end_ts == (ttr.tag.lifetime_end_ms / 1000) else: assert ttr.tag.lifetime_end_ms is None assert found_dead_tag
def _garbage_collect_manifest(manifest_id, context): assert manifest_id is not None # Make sure the manifest isn't referenced. if _check_manifest_used(manifest_id): return False # Add the manifest's blobs to the context to be GCed. for manifest_blob in ManifestBlob.select().where(ManifestBlob.manifest == manifest_id): context.add_blob_id(manifest_blob.blob_id) # Retrieve the manifest's associated image, if any. try: legacy_image_id = ManifestLegacyImage.get(manifest=manifest_id).image_id context.add_legacy_image_id(legacy_image_id) except ManifestLegacyImage.DoesNotExist: legacy_image_id = None # Add child manifests to be GCed. for connector in ManifestChild.select().where(ManifestChild.manifest == manifest_id): context.add_manifest_id(connector.child_manifest_id) # Add the labels to be GCed. for manifest_label in ManifestLabel.select().where(ManifestLabel.manifest == manifest_id): context.add_label_id(manifest_label.label_id) # Delete the manifest. with db_transaction(): try: manifest = Manifest.select().where(Manifest.id == manifest_id).get() except Manifest.DoesNotExist: return False assert manifest.id == manifest_id assert manifest.repository_id == context.repository.id if _check_manifest_used(manifest_id): return False # Delete any label mappings. (TagManifestLabelMap .delete() .where(TagManifestLabelMap.manifest == manifest_id) .execute()) # Delete any mapping rows for the manifest. TagManifestToManifest.delete().where(TagManifestToManifest.manifest == manifest_id).execute() # Delete any label rows. ManifestLabel.delete().where(ManifestLabel.manifest == manifest_id, ManifestLabel.repository == context.repository).execute() # Delete any child manifest rows. ManifestChild.delete().where(ManifestChild.manifest == manifest_id, ManifestChild.repository == context.repository).execute() # Delete the manifest blobs for the manifest. ManifestBlob.delete().where(ManifestBlob.manifest == manifest_id, ManifestBlob.repository == context.repository).execute() # Delete the manifest legacy image row. if legacy_image_id: (ManifestLegacyImage .delete() .where(ManifestLegacyImage.manifest == manifest_id, ManifestLegacyImage.repository == context.repository) .execute()) # Delete the manifest. manifest.delete_instance() context.mark_manifest_removed(manifest) return True
def test_get_or_create_manifest(schema_version, initialized_db): repository = create_repository("devtable", "newrepo", None) expected_labels = { "Foo": "Bar", "Baz": "Meh", } layer_json = json.dumps({ "id": "somelegacyid", "config": { "Labels": expected_labels, }, "rootfs": { "type": "layers", "diff_ids": [] }, "history": [ { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, ], }) # Create a legacy image. find_create_or_link_image("somelegacyid", repository, "devtable", {}, "local_us") # Add a blob containing the config. _, config_digest = _populate_blob(layer_json) # Add a blob of random data. random_data = "hello world" _, random_digest = _populate_blob(random_data) # Build the manifest. if schema_version == 1: builder = DockerSchema1ManifestBuilder("devtable", "simple", "anothertag") builder.add_layer(random_digest, layer_json) sample_manifest_instance = builder.build(docker_v2_signing_key) elif schema_version == 2: builder = DockerSchema2ManifestBuilder() builder.set_config_digest(config_digest, len(layer_json.encode("utf-8"))) builder.add_layer(random_digest, len(random_data.encode("utf-8"))) sample_manifest_instance = builder.build() assert sample_manifest_instance.layers_compressed_size is not None # Create a new manifest. created_manifest = get_or_create_manifest(repository, sample_manifest_instance, storage) created = created_manifest.manifest newly_created = created_manifest.newly_created assert newly_created assert created is not None assert created.media_type.name == sample_manifest_instance.media_type assert created.digest == sample_manifest_instance.digest assert created.manifest_bytes == sample_manifest_instance.bytes.as_encoded_str( ) assert created_manifest.labels_to_apply == expected_labels assert created.config_media_type == sample_manifest_instance.config_media_type assert created.layers_compressed_size == sample_manifest_instance.layers_compressed_size # Lookup the manifest and verify. found = lookup_manifest(repository, created.digest, allow_dead=True) assert found.digest == created.digest assert found.config_media_type == created.config_media_type assert found.layers_compressed_size == created.layers_compressed_size # Verify it has a temporary tag pointing to it. assert Tag.get(manifest=created, hidden=True).lifetime_end_ms # Verify the linked blobs. blob_digests = [ mb.blob.content_checksum for mb in ManifestBlob.select().where(ManifestBlob.manifest == created) ] assert random_digest in blob_digests if schema_version == 2: assert config_digest in blob_digests # Retrieve it again and ensure it is the same manifest. created_manifest2 = get_or_create_manifest(repository, sample_manifest_instance, storage) created2 = created_manifest2.manifest newly_created2 = created_manifest2.newly_created assert not newly_created2 assert created2 == created # Ensure it again has a temporary tag. assert Tag.get(manifest=created2, hidden=True).lifetime_end_ms # Ensure the labels were added. labels = list(list_manifest_labels(created)) assert len(labels) == 2 labels_dict = {label.key: label.value for label in labels} assert labels_dict == expected_labels
def assert_gc_integrity(expect_storage_removed=True, check_oci_tags=True): """ Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes the callback for images removed and doesn't invoke the callback for images *not* removed. """ # Add a callback for when images are removed. removed_image_storages = [] model.config.register_image_cleanup_callback(removed_image_storages.extend) # Store the number of dangling storages and labels. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() yield # Ensure the number of dangling storages, manifests and labels has not changed. updated_storage_count = _get_dangling_storage_count() assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() assert updated_label_count == existing_label_count, _get_dangling_labels() updated_manifest_count = _get_dangling_manifest_count() assert updated_manifest_count == existing_manifest_count # Ensure that for each call to the image+storage cleanup callback, the image and its # storage is not found *anywhere* in the database. for removed_image_and_storage in removed_image_storages: with pytest.raises(Image.DoesNotExist): Image.get(id=removed_image_and_storage.id) # Ensure that image storages are only removed if not shared. shared = Image.select().where( Image.storage == removed_image_and_storage.storage_id).count() if shared == 0: shared = (ManifestBlob.select().where( ManifestBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) # Ensure all CAS storage is in the storage engine. preferred = storage.preferred_locations[0] for storage_row in ImageStorage.select(): if storage_row.cas_path: storage.get_content({preferred}, storage.blob_path( storage_row.content_checksum)) for blob_row in ApprBlob.select(): storage.get_content({preferred}, storage.blob_path(blob_row.digest)) # Ensure there are no danglings OCI tags. if check_oci_tags: oci_tags = {t.id for t in Tag.select()} referenced_oci_tags = {t.tag_id for t in TagToRepositoryTag.select()} assert not oci_tags - referenced_oci_tags # Ensure all tags have valid manifests. for manifest in {t.manifest for t in Tag.select()}: # Ensure that the manifest's blobs all exist. found_blobs = { b.blob.content_checksum for b in ManifestBlob.select().where( ManifestBlob.manifest == manifest) } parsed = parse_manifest_from_bytes( Bytes.for_string_or_unicode(manifest.manifest_bytes), manifest.media_type.name) assert set(parsed.local_blob_digests) == found_blobs
def assert_gc_integrity(expect_storage_removed=True): """ Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes the callback for images removed and doesn't invoke the callback for images *not* removed. """ # Add a callback for when images are removed. removed_image_storages = [] remove_callback = model.config.register_image_cleanup_callback( removed_image_storages.extend) # Store existing storages. We won't verify these for existence because they # were likely created as test data. existing_digests = set() for storage_row in ImageStorage.select(): if storage_row.cas_path: existing_digests.add(storage_row.content_checksum) for blob_row in ApprBlob.select(): existing_digests.add(blob_row.digest) # Store the number of dangling objects. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() # Yield to the GC test. with check_transitive_modifications(): try: yield finally: remove_callback() # Ensure the number of dangling storages, manifests and labels has not changed. updated_storage_count = _get_dangling_storage_count() assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() assert updated_label_count == existing_label_count, _get_dangling_labels() updated_manifest_count = _get_dangling_manifest_count() assert updated_manifest_count == existing_manifest_count # Ensure that for each call to the image+storage cleanup callback, the image and its # storage is not found *anywhere* in the database. for removed_image_and_storage in removed_image_storages: assert isinstance(removed_image_and_storage, Image) try: # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row # with the same ID, make sure it does not have the same Docker Image ID. # See: https://www.sqlite.org/autoinc.html found_image = Image.get(id=removed_image_and_storage.id) assert (found_image.docker_image_id != removed_image_and_storage.docker_image_id ), "Found unexpected removed image %s under repo %s" % ( found_image.id, found_image.repository, ) except Image.DoesNotExist: pass # Ensure that image storages are only removed if not shared. shared = Image.select().where( Image.storage == removed_image_and_storage.storage_id).count() if shared == 0: shared = (ManifestBlob.select().where( ManifestBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: shared = (UploadedBlob.select().where( UploadedBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) # Ensure all CAS storage is in the storage engine. preferred = storage.preferred_locations[0] for storage_row in ImageStorage.select(): if storage_row.content_checksum in existing_digests: continue if storage_row.cas_path: storage.get_content({preferred}, storage.blob_path( storage_row.content_checksum)) for blob_row in ApprBlob.select(): if blob_row.digest in existing_digests: continue storage.get_content({preferred}, storage.blob_path(blob_row.digest)) # Ensure all tags have valid manifests. for manifest in {t.manifest for t in Tag.select()}: # Ensure that the manifest's blobs all exist. found_blobs = { b.blob.content_checksum for b in ManifestBlob.select().where( ManifestBlob.manifest == manifest) } parsed = parse_manifest_from_bytes( Bytes.for_string_or_unicode(manifest.manifest_bytes), manifest.media_type.name) assert set(parsed.local_blob_digests) == found_blobs
def test_get_or_create_manifest_with_remote_layers(initialized_db): repository = create_repository("devtable", "newrepo", None) layer_json = json.dumps({ "config": {}, "rootfs": { "type": "layers", "diff_ids": [] }, "history": [ { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, ], }) # Add a blob containing the config. _, config_digest = _populate_blob(layer_json) # Add a blob of random data. random_data = "hello world" _, random_digest = _populate_blob(random_data) remote_digest = sha256_digest(b"something") builder = DockerSchema2ManifestBuilder() builder.set_config_digest(config_digest, len(layer_json.encode("utf-8"))) builder.add_layer(remote_digest, 1234, urls=["http://hello/world"]) builder.add_layer(random_digest, len(random_data.encode("utf-8"))) manifest = builder.build() assert remote_digest in manifest.blob_digests assert remote_digest not in manifest.local_blob_digests assert manifest.has_remote_layer assert not manifest.has_legacy_image assert manifest.get_schema1_manifest("foo", "bar", "baz", None) is None # Write the manifest. created_tuple = get_or_create_manifest(repository, manifest, storage) assert created_tuple is not None created_manifest = created_tuple.manifest assert created_manifest assert created_manifest.media_type.name == manifest.media_type assert created_manifest.digest == manifest.digest assert created_manifest.config_media_type == manifest.config_media_type assert created_manifest.layers_compressed_size == manifest.layers_compressed_size # Verify the legacy image. legacy_image = get_legacy_image_for_manifest(created_manifest) assert legacy_image is None # Verify the linked blobs. blob_digests = { mb.blob.content_checksum for mb in ManifestBlob.select().where( ManifestBlob.manifest == created_manifest) } assert random_digest in blob_digests assert config_digest in blob_digests assert remote_digest not in blob_digests
def test_retriever(initialized_db): repository = create_repository("devtable", "newrepo", None) layer_json = json.dumps({ "config": {}, "rootfs": { "type": "layers", "diff_ids": [] }, "history": [ { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, ], }) # Add a blob containing the config. _, config_digest = _populate_blob(layer_json) # Add a blob of random data. random_data = "hello world" _, random_digest = _populate_blob(random_data) # Add another blob of random data. other_random_data = "hi place" _, other_random_digest = _populate_blob(other_random_data) remote_digest = sha256_digest(b"something") builder = DockerSchema2ManifestBuilder() builder.set_config_digest(config_digest, len(layer_json.encode("utf-8"))) builder.add_layer(other_random_digest, len(other_random_data.encode("utf-8"))) builder.add_layer(random_digest, len(random_data.encode("utf-8"))) manifest = builder.build() assert config_digest in manifest.blob_digests assert random_digest in manifest.blob_digests assert other_random_digest in manifest.blob_digests assert config_digest in manifest.local_blob_digests assert random_digest in manifest.local_blob_digests assert other_random_digest in manifest.local_blob_digests # Write the manifest. created_tuple = get_or_create_manifest(repository, manifest, storage) assert created_tuple is not None created_manifest = created_tuple.manifest assert created_manifest assert created_manifest.media_type.name == manifest.media_type assert created_manifest.digest == manifest.digest # Verify the linked blobs. blob_digests = { mb.blob.content_checksum for mb in ManifestBlob.select().where( ManifestBlob.manifest == created_manifest) } assert random_digest in blob_digests assert other_random_digest in blob_digests assert config_digest in blob_digests # Delete any Image rows linking to the blobs from temp tags. for blob_digest in blob_digests: storage_row = ImageStorage.get(content_checksum=blob_digest) for image in list(Image.select().where(Image.storage == storage_row)): all_temp = all([ rt.hidden for rt in RepositoryTag.select().where( RepositoryTag.image == image) ]) if all_temp: RepositoryTag.delete().where( RepositoryTag.image == image).execute() image.delete_instance(recursive=True) # Verify the blobs in the retriever. retriever = RepositoryContentRetriever(repository, storage) assert (retriever.get_manifest_bytes_with_digest( created_manifest.digest) == manifest.bytes.as_encoded_str()) for blob_digest in blob_digests: assert retriever.get_blob_bytes_with_digest(blob_digest) is not None
def test_get_or_create_manifest(schema_version, initialized_db): repository = create_repository('devtable', 'newrepo', None) expected_labels = { 'Foo': 'Bar', 'Baz': 'Meh', } layer_json = json.dumps({ 'id': 'somelegacyid', 'config': { 'Labels': expected_labels, }, "rootfs": { "type": "layers", "diff_ids": [] }, "history": [ { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, ], }) # Create a legacy image. find_create_or_link_image('somelegacyid', repository, 'devtable', {}, 'local_us') # Add a blob containing the config. _, config_digest = _populate_blob(layer_json) # Add a blob of random data. random_data = 'hello world' _, random_digest = _populate_blob(random_data) # Build the manifest. if schema_version == 1: builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag') builder.add_layer(random_digest, layer_json) sample_manifest_instance = builder.build(docker_v2_signing_key) elif schema_version == 2: builder = DockerSchema2ManifestBuilder() builder.set_config_digest(config_digest, len(layer_json)) builder.add_layer(random_digest, len(random_data)) sample_manifest_instance = builder.build() # Create a new manifest. created_manifest = get_or_create_manifest(repository, sample_manifest_instance, storage) created = created_manifest.manifest newly_created = created_manifest.newly_created assert newly_created assert created is not None assert created.media_type.name == sample_manifest_instance.media_type assert created.digest == sample_manifest_instance.digest assert created.manifest_bytes == sample_manifest_instance.bytes.as_encoded_str( ) assert created_manifest.labels_to_apply == expected_labels # Verify it has a temporary tag pointing to it. assert Tag.get(manifest=created, hidden=True).lifetime_end_ms # Verify the legacy image. legacy_image = get_legacy_image_for_manifest(created) assert legacy_image is not None assert legacy_image.storage.content_checksum == random_digest # Verify the linked blobs. blob_digests = [ mb.blob.content_checksum for mb in ManifestBlob.select().where(ManifestBlob.manifest == created) ] assert random_digest in blob_digests if schema_version == 2: assert config_digest in blob_digests # Retrieve it again and ensure it is the same manifest. created_manifest2 = get_or_create_manifest(repository, sample_manifest_instance, storage) created2 = created_manifest2.manifest newly_created2 = created_manifest2.newly_created assert not newly_created2 assert created2 == created # Ensure it again has a temporary tag. assert Tag.get(manifest=created2, hidden=True).lifetime_end_ms # Ensure the labels were added. labels = list(list_manifest_labels(created)) assert len(labels) == 2 labels_dict = {label.key: label.value for label in labels} assert labels_dict == expected_labels