def test_manifest_v2_shared_config_and_blobs(app, default_tag_policy): """ Test that GCing a tag that refers to a V2 manifest with the same config and some shared blobs as another manifest ensures that the config blob and shared blob are NOT GCed. """ repo = model.repository.create_repository("devtable", "newrepo", None) manifest1, built1 = create_manifest_for_testing(repo, differentiation_field="1", include_shared_blob=True) manifest2, built2 = create_manifest_for_testing(repo, differentiation_field="2", include_shared_blob=True) assert set(built1.local_blob_digests).intersection( built2.local_blob_digests) assert built1.config.digest == built2.config.digest # Create tags pointing to the manifests. model.oci.tag.retarget_tag("tag1", manifest1) model.oci.tag.retarget_tag("tag2", manifest2) with assert_gc_integrity(expect_storage_removed=True): # Delete tag2. model.oci.tag.delete_tag(repo, "tag2") assert gc_now(repo) # Ensure the blobs for manifest1 still all exist. preferred = storage.preferred_locations[0] for blob_digest in built1.local_blob_digests: storage_row = ImageStorage.get(content_checksum=blob_digest) assert storage_row.cas_path storage.get_content({preferred}, storage.blob_path(storage_row.content_checksum))
def test_manifest_backfill_broken_tag(clear_rows, initialized_db): """ Tests backfilling a broken tag. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() # Create a tag with an image referenced missing parent images. repo = model.repository.get_repository("devtable", "gargantuan") broken_image = Image.create( docker_image_id="foo", repository=repo, ancestors="/348723847234/", storage=ImageStorage.get(), ) broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name="broken") # Backfill the tag. assert backfill_tag(broken_image_tag) # Ensure we backfilled, even though we reference a broken manifest. tag_manifest = TagManifest.get(tag=broken_image_tag) map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) manifest = map_row.manifest assert manifest.manifest_bytes == tag_manifest.json_data tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag assert tag.name == "broken" assert tag.manifest == manifest
def get_or_create_shared_blob(digest, byte_data, storage): """ Returns the ImageStorage blob with the given digest or, if not present, adds a row and writes the given byte data to the storage engine. This method is *only* to be used for shared blobs that are globally accessible, such as the special empty gzipped tar layer that Docker no longer pushes to us. """ assert digest assert byte_data is not None and isinstance(byte_data, bytes) assert storage try: return ImageStorage.get(content_checksum=digest) except ImageStorage.DoesNotExist: preferred = storage.preferred_locations[0] location_obj = ImageStorageLocation.get(name=preferred) record = ImageStorage.create(image_size=len(byte_data), content_checksum=digest) try: storage.put_content([preferred], storage_model.get_layer_path(record), byte_data) ImageStoragePlacement.create(storage=record, location=location_obj) except: logger.exception("Exception when trying to write special layer %s", digest) record.delete_instance() raise return record
def get_shared_blob(digest): """ Returns the ImageStorage blob with the given digest or, if not present, returns None. This method is *only* to be used for shared blobs that are globally accessible, such as the special empty gzipped tar layer that Docker no longer pushes to us. """ assert digest try: return ImageStorage.get(content_checksum=digest, uploading=False) except ImageStorage.DoesNotExist: return None
def temp_link_blob(repository_id, blob_digest, link_expiration_s): """ Temporarily links to the blob record from the given repository. If the blob record is not found, return None. """ assert blob_digest with db_transaction(): try: storage = ImageStorage.get(content_checksum=blob_digest) except ImageStorage.DoesNotExist: return None _temp_link_blob(repository_id, storage, link_expiration_s) return storage
def store_blob_record_and_temp_link_in_repo( repository_id, blob_digest, location_obj, byte_count, link_expiration_s, uncompressed_byte_count=None, ): """ Store a record of the blob and temporarily link it to the specified repository. """ assert blob_digest assert byte_count is not None with db_transaction(): try: storage = ImageStorage.get(content_checksum=blob_digest) save_changes = False if storage.image_size is None: storage.image_size = byte_count save_changes = True if storage.uncompressed_size is None and uncompressed_byte_count is not None: storage.uncompressed_size = uncompressed_byte_count save_changes = True if save_changes: storage.save() ImageStoragePlacement.get(storage=storage, location=location_obj) except ImageStorage.DoesNotExist: storage = ImageStorage.create( content_checksum=blob_digest, uploading=False, image_size=byte_count, uncompressed_size=uncompressed_byte_count, ) ImageStoragePlacement.create(storage=storage, location=location_obj) except ImageStoragePlacement.DoesNotExist: ImageStoragePlacement.create(storage=storage, location=location_obj) _temp_link_blob(repository_id, storage, link_expiration_s) return storage
def _create_blob(self, digest: str, size: int, manifest_id: int, repo_id: int): try: blob = ImageStorage.get(content_checksum=digest) except ImageStorage.DoesNotExist: # TODO: which size should we really be setting here? blob = ImageStorage.create(content_checksum=digest, image_size=size, compressed_size=size) try: ManifestBlob.get(manifest_id=manifest_id, blob=blob, repository_id=repo_id) except ManifestBlob.DoesNotExist: ManifestBlob.create(manifest_id=manifest_id, blob=blob, repository_id=repo_id) return blob
def done(self): """ Marks the manifest builder as complete and disposes of any state. This call is optional and it is expected manifest builders will eventually time out if unused for an extended period of time. """ temp_storages = self._builder_state.temp_storages for storage_id in temp_storages: try: storage = ImageStorage.get(id=storage_id) if storage.uploading and storage.content_checksum != EMPTY_LAYER_BLOB_DIGEST: # Delete all the placements pointing to the storage. ImageStoragePlacement.delete().where( ImageStoragePlacement.storage == storage).execute() # Delete the storage. storage.delete_instance() except ImageStorage.DoesNotExist: pass session.pop(_SESSION_KEY, None)
def assert_gc_integrity(expect_storage_removed=True): """ Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes the callback for images removed and doesn't invoke the callback for images *not* removed. """ # Add a callback for when images are removed. removed_image_storages = [] remove_callback = model.config.register_image_cleanup_callback( removed_image_storages.extend) # Store existing storages. We won't verify these for existence because they # were likely created as test data. existing_digests = set() for storage_row in ImageStorage.select(): if storage_row.cas_path: existing_digests.add(storage_row.content_checksum) for blob_row in ApprBlob.select(): existing_digests.add(blob_row.digest) # Store the number of dangling objects. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() # Yield to the GC test. with check_transitive_modifications(): try: yield finally: remove_callback() # Ensure the number of dangling storages, manifests and labels has not changed. updated_storage_count = _get_dangling_storage_count() assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() assert updated_label_count == existing_label_count, _get_dangling_labels() updated_manifest_count = _get_dangling_manifest_count() assert updated_manifest_count == existing_manifest_count # Ensure that for each call to the image+storage cleanup callback, the image and its # storage is not found *anywhere* in the database. for removed_image_and_storage in removed_image_storages: assert isinstance(removed_image_and_storage, Image) try: # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row # with the same ID, make sure it does not have the same Docker Image ID. # See: https://www.sqlite.org/autoinc.html found_image = Image.get(id=removed_image_and_storage.id) assert (found_image.docker_image_id != removed_image_and_storage.docker_image_id ), "Found unexpected removed image %s under repo %s" % ( found_image.id, found_image.repository, ) except Image.DoesNotExist: pass # Ensure that image storages are only removed if not shared. shared = Image.select().where( Image.storage == removed_image_and_storage.storage_id).count() if shared == 0: shared = (ManifestBlob.select().where( ManifestBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: shared = (UploadedBlob.select().where( UploadedBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) # Ensure all CAS storage is in the storage engine. preferred = storage.preferred_locations[0] for storage_row in ImageStorage.select(): if storage_row.content_checksum in existing_digests: continue if storage_row.cas_path: storage.get_content({preferred}, storage.blob_path( storage_row.content_checksum)) for blob_row in ApprBlob.select(): if blob_row.digest in existing_digests: continue storage.get_content({preferred}, storage.blob_path(blob_row.digest)) # Ensure all tags have valid manifests. for manifest in {t.manifest for t in Tag.select()}: # Ensure that the manifest's blobs all exist. found_blobs = { b.blob.content_checksum for b in ManifestBlob.select().where( ManifestBlob.manifest == manifest) } parsed = parse_manifest_from_bytes( Bytes.for_string_or_unicode(manifest.manifest_bytes), manifest.media_type.name) assert set(parsed.local_blob_digests) == found_blobs
def assert_gc_integrity(expect_storage_removed=True, check_oci_tags=True): """ Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes the callback for images removed and doesn't invoke the callback for images *not* removed. """ # Add a callback for when images are removed. removed_image_storages = [] model.config.register_image_cleanup_callback(removed_image_storages.extend) # Store the number of dangling storages and labels. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() yield # Ensure the number of dangling storages, manifests and labels has not changed. updated_storage_count = _get_dangling_storage_count() assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() assert updated_label_count == existing_label_count, _get_dangling_labels() updated_manifest_count = _get_dangling_manifest_count() assert updated_manifest_count == existing_manifest_count # Ensure that for each call to the image+storage cleanup callback, the image and its # storage is not found *anywhere* in the database. for removed_image_and_storage in removed_image_storages: with pytest.raises(Image.DoesNotExist): Image.get(id=removed_image_and_storage.id) # Ensure that image storages are only removed if not shared. shared = Image.select().where( Image.storage == removed_image_and_storage.storage_id).count() if shared == 0: shared = (ManifestBlob.select().where( ManifestBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) # Ensure all CAS storage is in the storage engine. preferred = storage.preferred_locations[0] for storage_row in ImageStorage.select(): if storage_row.cas_path: storage.get_content({preferred}, storage.blob_path( storage_row.content_checksum)) for blob_row in ApprBlob.select(): storage.get_content({preferred}, storage.blob_path(blob_row.digest)) # Ensure there are no danglings OCI tags. if check_oci_tags: oci_tags = {t.id for t in Tag.select()} referenced_oci_tags = {t.tag_id for t in TagToRepositoryTag.select()} assert not oci_tags - referenced_oci_tags # Ensure all tags have valid manifests. for manifest in {t.manifest for t in Tag.select()}: # Ensure that the manifest's blobs all exist. found_blobs = { b.blob.content_checksum for b in ManifestBlob.select().where( ManifestBlob.manifest == manifest) } parsed = parse_manifest_from_bytes( Bytes.for_string_or_unicode(manifest.manifest_bytes), manifest.media_type.name) assert set(parsed.local_blob_digests) == found_blobs
def test_retriever(initialized_db): repository = create_repository("devtable", "newrepo", None) layer_json = json.dumps({ "config": {}, "rootfs": { "type": "layers", "diff_ids": [] }, "history": [ { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, ], }) # Add a blob containing the config. _, config_digest = _populate_blob(layer_json) # Add a blob of random data. random_data = "hello world" _, random_digest = _populate_blob(random_data) # Add another blob of random data. other_random_data = "hi place" _, other_random_digest = _populate_blob(other_random_data) remote_digest = sha256_digest(b"something") builder = DockerSchema2ManifestBuilder() builder.set_config_digest(config_digest, len(layer_json.encode("utf-8"))) builder.add_layer(other_random_digest, len(other_random_data.encode("utf-8"))) builder.add_layer(random_digest, len(random_data.encode("utf-8"))) manifest = builder.build() assert config_digest in manifest.blob_digests assert random_digest in manifest.blob_digests assert other_random_digest in manifest.blob_digests assert config_digest in manifest.local_blob_digests assert random_digest in manifest.local_blob_digests assert other_random_digest in manifest.local_blob_digests # Write the manifest. created_tuple = get_or_create_manifest(repository, manifest, storage) assert created_tuple is not None created_manifest = created_tuple.manifest assert created_manifest assert created_manifest.media_type.name == manifest.media_type assert created_manifest.digest == manifest.digest # Verify the linked blobs. blob_digests = { mb.blob.content_checksum for mb in ManifestBlob.select().where( ManifestBlob.manifest == created_manifest) } assert random_digest in blob_digests assert other_random_digest in blob_digests assert config_digest in blob_digests # Delete any Image rows linking to the blobs from temp tags. for blob_digest in blob_digests: storage_row = ImageStorage.get(content_checksum=blob_digest) for image in list(Image.select().where(Image.storage == storage_row)): all_temp = all([ rt.hidden for rt in RepositoryTag.select().where( RepositoryTag.image == image) ]) if all_temp: RepositoryTag.delete().where( RepositoryTag.image == image).execute() image.delete_instance(recursive=True) # Verify the blobs in the retriever. retriever = RepositoryContentRetriever(repository, storage) assert (retriever.get_manifest_bytes_with_digest( created_manifest.digest) == manifest.bytes.as_encoded_str()) for blob_digest in blob_digests: assert retriever.get_blob_bytes_with_digest(blob_digest) is not None