def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db): """ Tests that a manifest with a shared digest will be properly linked. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() repo = model.repository.get_repository("devtable", "gargantuan") tag_v30 = model.tag.get_active_tag("devtable", "gargantuan", "v3.0") tag_v50 = model.tag.get_active_tag("devtable", "gargantuan", "v5.0") # Build a manifest and assign it to both tags (this is allowed in the old model). builder = DockerSchema1ManifestBuilder("devtable", "gargantuan", "sometag") builder.add_layer("sha256:deadbeef", '{"id": "foo"}') manifest = builder.build(docker_v2_signing_key) manifest_1 = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest, tag=tag_v30) manifest_2 = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest, tag=tag_v50) # Backfill "both" manifests and ensure both are pointed to by a single resulting row. assert _backfill_manifest(manifest_1) assert _backfill_manifest(manifest_2) map_row1 = TagManifestToManifest.get(tag_manifest=manifest_1) map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2) assert map_row1.manifest == map_row2.manifest
def delete_manifests(): ManifestLegacyImage.delete().execute() ManifestBlob.delete().execute() Manifest.delete().execute() TagManifestToManifest.delete().execute() TagManifest.delete().execute() return "OK"
def _associate_manifest(tag, oci_manifest): with db_transaction(): tag_manifest = TagManifest.create( tag=tag, digest=oci_manifest.digest, json_data=oci_manifest.manifest_bytes) TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=oci_manifest) return tag_manifest
def clear_rows(initialized_db): # Remove all new-style rows so we can backfill. TagToRepositoryTag.delete().execute() Tag.delete().execute() TagManifestLabelMap.delete().execute() ManifestLabel.delete().execute() ManifestBlob.delete().execute() ManifestLegacyImage.delete().execute() TagManifestToManifest.delete().execute() Manifest.delete().execute()
def _get_manifest_id(repositorytag): repository_tag_datatype = TagDataType.for_repository_tag(repositorytag) # Retrieve the TagManifest for the RepositoryTag, backfilling if necessary. with db_transaction(): manifest_datatype = None try: manifest_datatype = pre_oci_model.get_manifest_for_tag( repository_tag_datatype, backfill_if_necessary=True) except MalformedSchema1Manifest: logger.exception('Error backfilling manifest for tag `%s`', repositorytag.id) if manifest_datatype is None: logger.error('Could not load or backfill manifest for tag `%s`', repositorytag.id) # Create a broken manifest for the tag. tag_manifest = TagManifest.create(tag=repositorytag, digest='BROKEN-%s' % repositorytag.id, json_data='{}') else: # Retrieve the new-style Manifest for the TagManifest, if any. try: tag_manifest = TagManifest.get(id=manifest_datatype._db_id) except TagManifest.DoesNotExist: logger.exception('Could not find tag manifest') return None try: found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest # Verify that the new-style manifest has the same contents as the old-style manifest. # If not, update and then return. This is an extra check put in place to ensure unicode # manifests have been correctly copied. if found.manifest_bytes != tag_manifest.json_data: logger.warning('Fixing manifest `%s`', found.id) found.manifest_bytes = tag_manifest.json_data found.save() return found.id except TagManifestToManifest.DoesNotExist: # Could not find the new style manifest, so backfill. _backfill_manifest(tag_manifest) # Try to retrieve the manifest again, since we've performed a backfill. try: return TagManifestToManifest.get(tag_manifest=tag_manifest).manifest_id except TagManifestToManifest.DoesNotExist: return None
def test_manifest_backfill_broken_tag(clear_rows, initialized_db): """ Tests backfilling a broken tag. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() # Create a tag with an image referenced missing parent images. repo = model.repository.get_repository("devtable", "gargantuan") broken_image = Image.create( docker_image_id="foo", repository=repo, ancestors="/348723847234/", storage=ImageStorage.get(), ) broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name="broken") # Backfill the tag. assert backfill_tag(broken_image_tag) # Ensure we backfilled, even though we reference a broken manifest. tag_manifest = TagManifest.get(tag=broken_image_tag) map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) manifest = map_row.manifest assert manifest.manifest_bytes == tag_manifest.json_data tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag assert tag.name == "broken" assert tag.manifest == manifest
def test_store_tag_manifest(get_storages, initialized_db): # Create a manifest with some layers. builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'sometag') storages = get_storages() assert storages repo = model.repository.get_repository('devtable', 'simple') storage_id_map = {} for index, storage in enumerate(storages): image_id = 'someimage%s' % index builder.add_layer(storage.content_checksum, json.dumps({'id': image_id})) find_create_or_link_image(image_id, repo, 'devtable', {}, 'local_us') storage_id_map[storage.content_checksum] = storage.id manifest = builder.build(docker_v2_signing_key) tag_manifest, _ = store_tag_manifest_for_testing('devtable', 'simple', 'sometag', manifest, manifest.leaf_layer_v1_image_id, storage_id_map) # Ensure we have the new-model expected rows. mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest) assert mapping_row.manifest is not None assert mapping_row.manifest.manifest_bytes == manifest.bytes.as_encoded_str() assert mapping_row.manifest.digest == str(manifest.digest) blob_rows = {m.blob_id for m in ManifestBlob.select().where(ManifestBlob.manifest == mapping_row.manifest)} assert blob_rows == {s.id for s in storages} assert ManifestLegacyImage.get(manifest=mapping_row.manifest).image == tag_manifest.tag.image
def test_manifestbackfillworker_mislinked_manifest(clear_rows, initialized_db): """ Tests that a manifest whose image is mislinked will have its storages relinked properly. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() repo = model.repository.get_repository('devtable', 'complex') tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0') tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0') # Add a mislinked manifest, by having its layer point to a blob in v3.0 but its image # be the v5.0 image. builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag') builder.add_layer(tag_v30.image.storage.content_checksum, '{"id": "foo"}') manifest = builder.build(docker_v2_signing_key) mislinked_manifest = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest, tag=tag_v50) # Backfill the manifest and ensure its proper content checksum was linked. assert _backfill_manifest(mislinked_manifest) map_row = TagManifestToManifest.get(tag_manifest=mislinked_manifest) assert not map_row.broken manifest_row = map_row.manifest legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert legacy_image == tag_v50.image manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)) assert len(manifest_blobs) == 1 assert manifest_blobs[0].blob.content_checksum == tag_v30.image.storage.content_checksum
def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initialized_db): """ Tests that a manifest whose image is mislinked will attempt to have its storages relinked properly. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() repo = model.repository.get_repository("devtable", "complex") tag_v50 = model.tag.get_active_tag("devtable", "gargantuan", "v5.0") # Add a mislinked manifest, by having its layer point to an invalid blob but its image # be the v5.0 image. builder = DockerSchema1ManifestBuilder("devtable", "gargantuan", "sometag") builder.add_layer("sha256:deadbeef", '{"id": "foo"}') manifest = builder.build(docker_v2_signing_key) broken_manifest = TagManifest.create( json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest, tag=tag_v50 ) # Backfill the manifest and ensure it is marked as broken. assert _backfill_manifest(broken_manifest) map_row = TagManifestToManifest.get(tag_manifest=broken_manifest) assert map_row.broken manifest_row = map_row.manifest legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert legacy_image == tag_v50.image manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)) assert len(manifest_blobs) == 0
def create_manifest_label(tag_manifest, key, value, source_type_name, media_type_name=None): """ Creates a new manifest label on a specific tag manifest. """ if not key: raise InvalidLabelKeyException("Missing key on label") # Note that we don't prevent invalid label names coming from the manifest to be stored, as Docker # does not currently prevent them from being put into said manifests. if not validate_label_key(key) and source_type_name != "manifest": raise InvalidLabelKeyException( "Label key `%s` is invalid or reserved" % key) # Find the matching media type. If none specified, we infer. if media_type_name is None: media_type_name = "text/plain" if is_json(value): media_type_name = "application/json" media_type_id = _get_media_type_id(media_type_name) if media_type_id is None: raise InvalidMediaTypeException() source_type_id = _get_label_source_type_id(source_type_name) with db_transaction(): label = Label.create(key=key, value=value, source_type=source_type_id, media_type=media_type_id) tag_manifest_label = TagManifestLabel.create( annotated=tag_manifest, label=label, repository=tag_manifest.tag.repository) try: mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest) if mapping_row.manifest: manifest_label = ManifestLabel.create( manifest=mapping_row.manifest, label=label, repository=tag_manifest.tag.repository, ) TagManifestLabelMap.create( manifest_label=manifest_label, tag_manifest_label=tag_manifest_label, label=label, manifest=mapping_row.manifest, tag_manifest=tag_manifest, ) except TagManifestToManifest.DoesNotExist: pass return label
def _garbage_collect_legacy_manifest(legacy_manifest_id, context): assert legacy_manifest_id is not None # Add the labels to be GCed. query = TagManifestLabel.select().where( TagManifestLabel.annotated == legacy_manifest_id) for manifest_label in query: context.add_label_id(manifest_label.label_id) # Delete the tag manifest. with db_transaction(): try: tag_manifest = TagManifest.select().where( TagManifest.id == legacy_manifest_id).get() except TagManifest.DoesNotExist: return False assert tag_manifest.id == legacy_manifest_id assert tag_manifest.tag.repository_id == context.repository.id # Delete any label mapping rows. (TagManifestLabelMap.delete().where( TagManifestLabelMap.tag_manifest == legacy_manifest_id).execute()) # Delete the label rows. TagManifestLabel.delete().where( TagManifestLabel.annotated == legacy_manifest_id).execute() # Delete the mapping row if it exists. try: tmt = (TagManifestToManifest.select().where( TagManifestToManifest.tag_manifest == tag_manifest).get()) context.add_manifest_id(tmt.manifest_id) tmt_deleted = tmt.delete_instance() if tmt_deleted: gc_table_rows_deleted.labels( table="TagManifestToManifest").inc() except TagManifestToManifest.DoesNotExist: pass # Delete the tag manifest. tag_manifest_deleted = tag_manifest.delete_instance() if tag_manifest_deleted: gc_table_rows_deleted.labels(table="TagManifest").inc() return True
def backfill_label(tag_manifest_label): logger.info("Backfilling label %s", tag_manifest_label.id) # Ensure that a mapping row doesn't already exist. If it does, we've been preempted. if lookup_map_row(tag_manifest_label): return False # Ensure the tag manifest has been backfilled into the manifest table. try: tmt = TagManifestToManifest.get( tag_manifest=tag_manifest_label.annotated) except TagManifestToManifest.DoesNotExist: # We'll come back to this later. logger.debug( "Tag Manifest %s for label %s has not yet been backfilled", tag_manifest_label.annotated.id, tag_manifest_label.id, ) return True repository = tag_manifest_label.repository # Create the new mapping entry and label. with db_transaction(): if lookup_map_row(tag_manifest_label): return False label = tag_manifest_label.label if tmt.manifest: try: manifest_label = ManifestLabel.create(manifest=tmt.manifest, label=label, repository=repository) TagManifestLabelMap.create( manifest_label=manifest_label, tag_manifest_label=tag_manifest_label, label=label, manifest=tmt.manifest, tag_manifest=tag_manifest_label.annotated, ) except IntegrityError: return False logger.info("Backfilled label %s", tag_manifest_label.id) return True
def test_retarget_tag(initialized_db): repo = get_repository("devtable", "history") results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name="latest") assert len(results) == 2 assert results[0].lifetime_end_ms is None assert results[1].lifetime_end_ms is not None # Revert back to the original manifest. created = retarget_tag("latest", results[0].manifest, is_reversion=True, now_ms=results[1].lifetime_end_ms + 10000) assert created.lifetime_end_ms is None assert created.reversion assert created.name == "latest" assert created.manifest == results[0].manifest # Verify in the history. results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name="latest") assert len(results) == 3 assert results[0].lifetime_end_ms is None assert results[1].lifetime_end_ms is not None assert results[2].lifetime_end_ms is not None assert results[0] == created # Verify old-style tables. repository_tag = TagToRepositoryTag.get(tag=created).repository_tag assert repository_tag.lifetime_start_ts == int(created.lifetime_start_ms / 1000) tag_manifest = TagManifest.get(tag=repository_tag) assert TagManifestToManifest.get( tag_manifest=tag_manifest).manifest == created.manifest
def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db): # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() # Add a broken manifest. broken_manifest = TagManifest.create(json_data='wat?', digest='sha256:foobar', tag=RepositoryTag.get()) # Ensure the backfill works. assert _backfill_manifest(broken_manifest) # Ensure the mapping is marked as broken. map_row = TagManifestToManifest.get(tag_manifest=broken_manifest) assert map_row.broken manifest_row = map_row.manifest assert manifest_row.manifest_bytes == broken_manifest.json_data assert manifest_row.digest == broken_manifest.digest assert manifest_row.repository == broken_manifest.tag.repository legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert broken_manifest.tag.image == legacy_image
def _garbage_collect_manifest(manifest_id, context): assert manifest_id is not None # Make sure the manifest isn't referenced. if _check_manifest_used(manifest_id): return False # Add the manifest's blobs to the context to be GCed. for manifest_blob in ManifestBlob.select().where(ManifestBlob.manifest == manifest_id): context.add_blob_id(manifest_blob.blob_id) # Retrieve the manifest's associated image, if any. try: legacy_image_id = ManifestLegacyImage.get(manifest=manifest_id).image_id context.add_legacy_image_id(legacy_image_id) except ManifestLegacyImage.DoesNotExist: legacy_image_id = None # Add child manifests to be GCed. for connector in ManifestChild.select().where(ManifestChild.manifest == manifest_id): context.add_manifest_id(connector.child_manifest_id) # Add the labels to be GCed. for manifest_label in ManifestLabel.select().where(ManifestLabel.manifest == manifest_id): context.add_label_id(manifest_label.label_id) # Delete the manifest. with db_transaction(): try: manifest = Manifest.select().where(Manifest.id == manifest_id).get() except Manifest.DoesNotExist: return False assert manifest.id == manifest_id assert manifest.repository_id == context.repository.id if _check_manifest_used(manifest_id): return False # Delete any label mappings. (TagManifestLabelMap .delete() .where(TagManifestLabelMap.manifest == manifest_id) .execute()) # Delete any mapping rows for the manifest. TagManifestToManifest.delete().where(TagManifestToManifest.manifest == manifest_id).execute() # Delete any label rows. ManifestLabel.delete().where(ManifestLabel.manifest == manifest_id, ManifestLabel.repository == context.repository).execute() # Delete any child manifest rows. ManifestChild.delete().where(ManifestChild.manifest == manifest_id, ManifestChild.repository == context.repository).execute() # Delete the manifest blobs for the manifest. ManifestBlob.delete().where(ManifestBlob.manifest == manifest_id, ManifestBlob.repository == context.repository).execute() # Delete the manifest legacy image row. if legacy_image_id: (ManifestLegacyImage .delete() .where(ManifestLegacyImage.manifest == manifest_id, ManifestLegacyImage.repository == context.repository) .execute()) # Delete the manifest. manifest.delete_instance() context.mark_manifest_removed(manifest) return True
def _get_dangling_manifest_count(): manifest_ids = set([current.id for current in Manifest.select()]) referenced_by_tag_manifest = set( [tmt.manifest_id for tmt in TagManifestToManifest.select()]) return len(manifest_ids - referenced_by_tag_manifest)
def retarget_tag(tag_name, manifest_id, is_reversion=False, now_ms=None, adjust_old_model=True): """ Creates or updates a tag with the specified name to point to the given manifest under its repository. If this action is a reversion to a previous manifest, is_reversion should be set to True. Returns the newly created tag row or None on error. """ try: manifest = (Manifest.select( Manifest, MediaType).join(MediaType).where(Manifest.id == manifest_id).get()) except Manifest.DoesNotExist: return None # CHECK: Make sure that we are not mistargeting a schema 1 manifest to a tag with a different # name. if manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES: try: parsed = DockerSchema1Manifest(Bytes.for_string_or_unicode( manifest.manifest_bytes), validate=False) if parsed.tag != tag_name: logger.error( "Tried to re-target schema1 manifest with tag `%s` to tag `%s", parsed.tag, tag_name, ) return None except MalformedSchema1Manifest: logger.exception("Could not parse schema1 manifest") return None legacy_image = get_legacy_image_for_manifest(manifest) now_ms = now_ms or get_epoch_timestamp_ms() now_ts = int(now_ms / 1000) with db_transaction(): # Lookup an existing tag in the repository with the same name and, if present, mark it # as expired. existing_tag = get_tag(manifest.repository_id, tag_name) if existing_tag is not None: _, okay = set_tag_end_ms(existing_tag, now_ms) # TODO: should we retry here and/or use a for-update? if not okay: return None # Create a new tag pointing to the manifest with a lifetime start of now. created = Tag.create( name=tag_name, repository=manifest.repository_id, lifetime_start_ms=now_ms, reversion=is_reversion, manifest=manifest, tag_kind=Tag.tag_kind.get_id("tag"), ) # TODO: Remove the linkage code once RepositoryTag is gone. # If this is a schema 1 manifest, then add a TagManifest linkage to it. Otherwise, it will only # be pullable via the new OCI model. if adjust_old_model: if (manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES and legacy_image is not None): old_style_tag = RepositoryTag.create( repository=manifest.repository_id, image=legacy_image, name=tag_name, lifetime_start_ts=now_ts, reversion=is_reversion, ) TagToRepositoryTag.create(tag=created, repository_tag=old_style_tag, repository=manifest.repository_id) tag_manifest = TagManifest.create( tag=old_style_tag, digest=manifest.digest, json_data=manifest.manifest_bytes) TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest, repository=manifest.repository_id) return created
def _garbage_collect_manifest(manifest_id, context): assert manifest_id is not None # Make sure the manifest isn't referenced. if _check_manifest_used(manifest_id): return False # Add the manifest's blobs to the context to be GCed. for manifest_blob in ManifestBlob.select().where( ManifestBlob.manifest == manifest_id): context.add_blob_id(manifest_blob.blob_id) # Retrieve the manifest's associated image, if any. try: legacy_image_id = ManifestLegacyImage.get( manifest=manifest_id).image_id context.add_legacy_image_id(legacy_image_id) except ManifestLegacyImage.DoesNotExist: legacy_image_id = None # Add child manifests to be GCed. for connector in ManifestChild.select().where( ManifestChild.manifest == manifest_id): context.add_manifest_id(connector.child_manifest_id) # Add the labels to be GCed. for manifest_label in ManifestLabel.select().where( ManifestLabel.manifest == manifest_id): context.add_label_id(manifest_label.label_id) # Delete the manifest. with db_transaction(): try: manifest = Manifest.select().where( Manifest.id == manifest_id).get() except Manifest.DoesNotExist: return False assert manifest.id == manifest_id assert manifest.repository_id == context.repository.id if _check_manifest_used(manifest_id): return False # Delete any label mappings. deleted_tag_manifest_label_map = (TagManifestLabelMap.delete().where( TagManifestLabelMap.manifest == manifest_id).execute()) # Delete any mapping rows for the manifest. deleted_tag_manifest_to_manifest = ( TagManifestToManifest.delete().where( TagManifestToManifest.manifest == manifest_id).execute()) # Delete any label rows. deleted_manifest_label = (ManifestLabel.delete().where( ManifestLabel.manifest == manifest_id, ManifestLabel.repository == context.repository, ).execute()) # Delete any child manifest rows. deleted_manifest_child = (ManifestChild.delete().where( ManifestChild.manifest == manifest_id, ManifestChild.repository == context.repository, ).execute()) # Delete the manifest blobs for the manifest. deleted_manifest_blob = (ManifestBlob.delete().where( ManifestBlob.manifest == manifest_id, ManifestBlob.repository == context.repository).execute()) # Delete the security status for the manifest deleted_manifest_security = (ManifestSecurityStatus.delete().where( ManifestSecurityStatus.manifest == manifest_id, ManifestSecurityStatus.repository == context.repository, ).execute()) # Delete the manifest legacy image row. deleted_manifest_legacy_image = 0 if legacy_image_id: deleted_manifest_legacy_image = ( ManifestLegacyImage.delete().where( ManifestLegacyImage.manifest == manifest_id, ManifestLegacyImage.repository == context.repository, ).execute()) # Delete the manifest. manifest.delete_instance() context.mark_manifest_removed(manifest) gc_table_rows_deleted.labels( table="TagManifestLabelMap").inc(deleted_tag_manifest_label_map) gc_table_rows_deleted.labels( table="TagManifestToManifest").inc(deleted_tag_manifest_to_manifest) gc_table_rows_deleted.labels( table="ManifestLabel").inc(deleted_manifest_label) gc_table_rows_deleted.labels( table="ManifestChild").inc(deleted_manifest_child) gc_table_rows_deleted.labels( table="ManifestBlob").inc(deleted_manifest_blob) gc_table_rows_deleted.labels( table="ManifestSecurityStatus").inc(deleted_manifest_security) if deleted_manifest_legacy_image: gc_table_rows_deleted.labels( table="ManifestLegacyImage").inc(deleted_manifest_legacy_image) gc_table_rows_deleted.labels(table="Manifest").inc() return True
def test_tagbackfillworker(clear_all_rows, initialized_db): # Remove the new-style rows so we can backfill. TagToRepositoryTag.delete().execute() Tag.delete().execute() if clear_all_rows: TagManifestLabelMap.delete().execute() ManifestLabel.delete().execute() ManifestBlob.delete().execute() ManifestLegacyImage.delete().execute() TagManifestToManifest.delete().execute() Manifest.delete().execute() found_dead_tag = False for repository_tag in list(RepositoryTag.select()): # Backfill the tag. assert backfill_tag(repository_tag) # Ensure if we try again, the backfill is skipped. assert not backfill_tag(repository_tag) # Ensure that we now have the expected tag rows. tag_to_repo_tag = TagToRepositoryTag.get(repository_tag=repository_tag) tag = tag_to_repo_tag.tag assert tag.name == repository_tag.name assert tag.repository == repository_tag.repository assert not tag.hidden assert tag.reversion == repository_tag.reversion if repository_tag.lifetime_start_ts is None: assert tag.lifetime_start_ms is None else: assert tag.lifetime_start_ms == (repository_tag.lifetime_start_ts * 1000) if repository_tag.lifetime_end_ts is None: assert tag.lifetime_end_ms is None else: assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts * 1000) found_dead_tag = True assert tag.manifest # Ensure that we now have the expected manifest rows. try: tag_manifest = TagManifest.get(tag=repository_tag) except TagManifest.DoesNotExist: continue map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) assert not map_row.broken manifest_row = map_row.manifest assert manifest_row.manifest_bytes == tag_manifest.json_data assert manifest_row.digest == tag_manifest.digest assert manifest_row.repository == tag_manifest.tag.repository assert tag.manifest == map_row.manifest legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert tag_manifest.tag.image == legacy_image expected_storages = {tag_manifest.tag.image.storage.id} for parent_image_id in tag_manifest.tag.image.ancestor_id_list(): expected_storages.add(Image.get(id=parent_image_id).storage_id) found_storages = { manifest_blob.blob_id for manifest_blob in ManifestBlob.select().where( ManifestBlob.manifest == manifest_row) } assert expected_storages == found_storages # Ensure the labels were copied over. tmls = list(TagManifestLabel.select().where( TagManifestLabel.annotated == tag_manifest)) expected_labels = {tml.label_id for tml in tmls} found_labels = { m.label_id for m in ManifestLabel.select().where( ManifestLabel.manifest == manifest_row) } assert found_labels == expected_labels # Verify at the repository level. for repository in list(Repository.select()): tags = RepositoryTag.select().where( RepositoryTag.repository == repository, RepositoryTag.hidden == False) oci_tags = Tag.select().where(Tag.repository == repository) assert len(tags) == len(oci_tags) assert {t.name for t in tags} == {t.name for t in oci_tags} for tag in tags: tag_manifest = TagManifest.get(tag=tag) ttr = TagToRepositoryTag.get(repository_tag=tag) manifest = ttr.tag.manifest assert tag_manifest.json_data == manifest.manifest_bytes assert tag_manifest.digest == manifest.digest assert tag.image == ManifestLegacyImage.get( manifest=manifest).image assert tag.lifetime_start_ts == (ttr.tag.lifetime_start_ms / 1000) if tag.lifetime_end_ts: assert tag.lifetime_end_ts == (ttr.tag.lifetime_end_ms / 1000) else: assert ttr.tag.lifetime_end_ms is None assert found_dead_tag
def lookup_manifest_map_row(tag_manifest): try: TagManifestToManifest.get(tag_manifest=tag_manifest) return True except TagManifestToManifest.DoesNotExist: return False
def _backfill_manifest(tag_manifest): logger.info('Backfilling manifest for tag manifest %s', tag_manifest.id) # Ensure that a mapping row doesn't already exist. If it does, we've been preempted. if lookup_manifest_map_row(tag_manifest): return False # Parse the manifest. If we cannot parse, then we treat the manifest as broken and just emit it # without additional rows or data, as it will eventually not be useful. is_broken = False try: manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode( tag_manifest.json_data), validate=False) except ManifestException: logger.exception('Exception when trying to parse manifest %s', tag_manifest.id) manifest = BrokenManifest(tag_manifest.digest, tag_manifest.json_data) is_broken = True # Lookup the storages for the digests. root_image = tag_manifest.tag.image repository = tag_manifest.tag.repository image_storage_id_map = { root_image.storage.content_checksum: root_image.storage.id } try: parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image) except DataModelException: logger.exception( 'Exception when trying to load parent images for manifest `%s`', tag_manifest.id) parent_images = {} is_broken = True for parent_image in parent_images: image_storage_id_map[ parent_image.storage.content_checksum] = parent_image.storage.id # Ensure that all the expected blobs have been found. If not, we lookup the blob under the repo # and add its storage ID. If the blob is not found, we mark the manifest as broken. storage_ids = set() try: for blob_digest in manifest.get_blob_digests_for_translation(): if blob_digest in image_storage_id_map: storage_ids.add(image_storage_id_map[blob_digest]) else: logger.debug( 'Blob `%s` not found in images for manifest `%s`; checking repo', blob_digest, tag_manifest.id) try: blob_storage = get_repo_blob_by_digest( repository.namespace_user.username, repository.name, blob_digest) storage_ids.add(blob_storage.id) except BlobDoesNotExist: logger.debug( 'Blob `%s` not found in repo for manifest `%s`', blob_digest, tag_manifest.id) is_broken = True except MalformedSchema1Manifest: logger.warning( 'Found malformed schema 1 manifest during blob backfill') is_broken = True with db_transaction(): # Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag. try: tag_manifest = TagManifest.get(id=tag_manifest.id) except TagManifest.DoesNotExist: return True # Ensure it wasn't already created. if lookup_manifest_map_row(tag_manifest): return False # Check for a pre-existing manifest matching the digest in the repository. This can happen # if we've already created the manifest row (typically for tag reverision). try: manifest_row = Manifest.get(digest=manifest.digest, repository=tag_manifest.tag.repository) except Manifest.DoesNotExist: # Create the new-style rows for the manifest. try: manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, tag_manifest.tag.image, storage_ids) except IntegrityError: # Pre-empted. return False # Create the mapping row. If we find another was created for this tag manifest in the # meantime, then we've been preempted. try: TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest_row, broken=is_broken) except IntegrityError: return False # Backfill any labels on the manifest. _backfill_labels(tag_manifest, manifest_row, repository) return True
def create_manifest_label(manifest_id, key, value, source_type_name, media_type_name=None, adjust_old_model=True): """ Creates a new manifest label on a specific tag manifest. """ if not key: raise InvalidLabelKeyException() # Note that we don't prevent invalid label names coming from the manifest to be stored, as Docker # does not currently prevent them from being put into said manifests. if not validate_label_key(key) and source_type_name != "manifest": raise InvalidLabelKeyException("Key `%s` is invalid" % key) # Find the matching media type. If none specified, we infer. if media_type_name is None: media_type_name = "text/plain" if is_json(value): media_type_name = "application/json" try: media_type_id = Label.media_type.get_id(media_type_name) except MediaType.DoesNotExist: raise InvalidMediaTypeException() source_type_id = Label.source_type.get_id(source_type_name) # Ensure the manifest exists. try: manifest = (Manifest.select(Manifest, Repository).join(Repository).where( Manifest.id == manifest_id).get()) except Manifest.DoesNotExist: return None repository = manifest.repository # TODO: Remove this code once the TagManifest table is gone. tag_manifest = None if adjust_old_model: try: mapping_row = (TagManifestToManifest.select( TagManifestToManifest, TagManifest).join(TagManifest).where( TagManifestToManifest.manifest == manifest).get()) tag_manifest = mapping_row.tag_manifest except TagManifestToManifest.DoesNotExist: tag_manifest = None with db_transaction(): label = Label.create(key=key, value=value, source_type=source_type_id, media_type=media_type_id) manifest_label = ManifestLabel.create(manifest=manifest_id, label=label, repository=repository) # If there exists a mapping to a TagManifest, add the old-style label. # TODO: Remove this code once the TagManifest table is gone. if tag_manifest: tag_manifest_label = TagManifestLabel.create( annotated=tag_manifest, label=label, repository=repository) TagManifestLabelMap.create( manifest_label=manifest_label, tag_manifest_label=tag_manifest_label, label=label, manifest=manifest, tag_manifest=tag_manifest, ) return label