def _purge_oci_tag(tag, context, allow_non_expired=False): assert tag.repository_id == context.repository.id if not allow_non_expired: assert tag.lifetime_end_ms is not None assert tag.lifetime_end_ms <= oci_tag.get_epoch_timestamp_ms() # Add the manifest to be GCed. context.add_manifest_id(tag.manifest_id) with db_transaction(): # Reload the tag and verify its lifetime_end_ms has not changed. try: reloaded_tag = db_for_update(Tag.select().where(Tag.id == tag.id)).get() except Tag.DoesNotExist: return False assert reloaded_tag.id == tag.id assert reloaded_tag.repository_id == context.repository.id if reloaded_tag.lifetime_end_ms != tag.lifetime_end_ms: return False # Delete mapping rows. TagToRepositoryTag.delete().where(TagToRepositoryTag.tag == tag).execute() # Delete the tag. tag.delete_instance()
def _purge_pre_oci_tag(tag, context, allow_non_expired=False): assert tag.repository_id == context.repository.id if not allow_non_expired: assert tag.lifetime_end_ts is not None assert tag.lifetime_end_ts <= pre_oci_tag.get_epoch_timestamp() # If it exists, GC the tag manifest. try: tag_manifest = TagManifest.select().where(TagManifest.tag == tag).get() _garbage_collect_legacy_manifest(tag_manifest.id, context) except TagManifest.DoesNotExist: pass # Add the tag's legacy image to be GCed. context.add_legacy_image_id(tag.image_id) with db_transaction(): # Reload the tag and verify its lifetime_end_ts has not changed. try: reloaded_tag = db_for_update(RepositoryTag.select().where(RepositoryTag.id == tag.id)).get() except RepositoryTag.DoesNotExist: return False assert reloaded_tag.id == tag.id assert reloaded_tag.repository_id == context.repository.id if reloaded_tag.lifetime_end_ts != tag.lifetime_end_ts: return False # Delete mapping rows. TagToRepositoryTag.delete().where(TagToRepositoryTag.repository_tag == reloaded_tag).execute() # Delete the tag. reloaded_tag.delete_instance()
def clear_rows(initialized_db): # Remove all new-style rows so we can backfill. TagToRepositoryTag.delete().execute() Tag.delete().execute() TagManifestLabelMap.delete().execute() ManifestLabel.delete().execute() ManifestBlob.delete().execute() ManifestLegacyImage.delete().execute() TagManifestToManifest.delete().execute() Manifest.delete().execute()
def _delete_tag(tag, now_ms): """ Deletes the given tag by marking it as expired. """ now_ts = int(now_ms // 1000) with db_transaction(): updated = (Tag.update(lifetime_end_ms=now_ms).where( Tag.id == tag.id, Tag.lifetime_end_ms == tag.lifetime_end_ms).execute()) if updated != 1: return None # TODO: Remove the linkage code once RepositoryTag is gone. try: old_style_tag = (TagToRepositoryTag.select( TagToRepositoryTag, RepositoryTag).join(RepositoryTag).where( TagToRepositoryTag.tag == tag).get()).repository_tag old_style_tag.lifetime_end_ts = now_ts old_style_tag.save() except TagToRepositoryTag.DoesNotExist: pass return tag
def set_tag_end_ms(tag, end_ms): """ Sets the end timestamp for a tag. Should only be called by change_tag_expiration or tests. """ with db_transaction(): updated = (Tag.update(lifetime_end_ms=end_ms).where( Tag.id == tag).where( Tag.lifetime_end_ms == tag.lifetime_end_ms).execute()) if updated != 1: return (None, False) # TODO: Remove the linkage code once RepositoryTag is gone. try: old_style_tag = (TagToRepositoryTag.select( TagToRepositoryTag, RepositoryTag).join(RepositoryTag).where( TagToRepositoryTag.tag == tag).get()).repository_tag old_style_tag.lifetime_end_ts = end_ms // 1000 if end_ms is not None else None old_style_tag.save() except TagToRepositoryTag.DoesNotExist: pass return (tag.lifetime_end_ms, True)
def test_manifest_backfill_broken_tag(clear_rows, initialized_db): """ Tests backfilling a broken tag. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() # Create a tag with an image referenced missing parent images. repo = model.repository.get_repository("devtable", "gargantuan") broken_image = Image.create( docker_image_id="foo", repository=repo, ancestors="/348723847234/", storage=ImageStorage.get(), ) broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name="broken") # Backfill the tag. assert backfill_tag(broken_image_tag) # Ensure we backfilled, even though we reference a broken manifest. tag_manifest = TagManifest.get(tag=broken_image_tag) map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) manifest = map_row.manifest assert manifest.manifest_bytes == tag_manifest.json_data tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag assert tag.name == "broken" assert tag.manifest == manifest
def backfill_tag(repositorytag): logger.info("Backfilling tag %s", repositorytag.id) # Ensure that a mapping row doesn't already exist. If it does, nothing more to do. if lookup_map_row(repositorytag): return False # Grab the manifest for the RepositoryTag, backfilling as necessary. manifest_id = _get_manifest_id(repositorytag) if manifest_id is None: return True lifetime_start_ms = ( repositorytag.lifetime_start_ts * 1000 if repositorytag.lifetime_start_ts is not None else None ) lifetime_end_ms = ( repositorytag.lifetime_end_ts * 1000 if repositorytag.lifetime_end_ts is not None else None ) # Create the new Tag. with db_transaction(): if lookup_map_row(repositorytag): return False try: created = Tag.create( name=repositorytag.name, repository=repositorytag.repository, lifetime_start_ms=lifetime_start_ms, lifetime_end_ms=lifetime_end_ms, reversion=repositorytag.reversion, manifest=manifest_id, tag_kind=Tag.tag_kind.get_id("tag"), ) TagToRepositoryTag.create( tag=created, repository_tag=repositorytag, repository=repositorytag.repository ) except IntegrityError: logger.exception("Could not create tag for repo tag `%s`", repositorytag.id) return False logger.info("Backfilled tag %s", repositorytag.id) return True
def verify_backfill(namespace_name): logger.info('Checking namespace %s', namespace_name) namespace_user = model.user.get_namespace_user(namespace_name) assert namespace_user repo_tags = (RepositoryTag .select() .join(Repository) .where(Repository.namespace_user == namespace_user) .where(RepositoryTag.hidden == False)) repo_tags = list(repo_tags) logger.info('Found %s tags', len(repo_tags)) for index, repo_tag in enumerate(repo_tags): logger.info('Checking tag %s under repository %s (%s/%s)', repo_tag.name, repo_tag.repository.name, index + 1, len(repo_tags)) tag = TagToRepositoryTag.get(repository_tag=repo_tag).tag assert not tag.hidden assert tag.repository == repo_tag.repository assert tag.name == repo_tag.name, _vs(tag.name, repo_tag.name) assert tag.repository == repo_tag.repository, _vs(tag.repository_id, repo_tag.repository_id) assert tag.reversion == repo_tag.reversion, _vs(tag.reversion, repo_tag.reversion) start_check = int(tag.lifetime_start_ms / 1000) == repo_tag.lifetime_start_ts assert start_check, _vs(tag.lifetime_start_ms, repo_tag.lifetime_start_ts) if repo_tag.lifetime_end_ts is not None: end_check = int(tag.lifetime_end_ms / 1000) == repo_tag.lifetime_end_ts assert end_check, _vs(tag.lifetime_end_ms, repo_tag.lifetime_end_ts) else: assert tag.lifetime_end_ms is None try: tag_manifest = tag.manifest repo_tag_manifest = TagManifest.get(tag=repo_tag) digest_check = tag_manifest.digest == repo_tag_manifest.digest assert digest_check, _vs(tag_manifest.digest, repo_tag_manifest.digest) bytes_check = tag_manifest.manifest_bytes == repo_tag_manifest.json_data assert bytes_check, _vs(tag_manifest.manifest_bytes, repo_tag_manifest.json_data) except TagManifest.DoesNotExist: logger.info('No tag manifest found for repository tag %s', repo_tag.id) mli = ManifestLegacyImage.get(manifest=tag_manifest) assert mli.repository == repo_tag.repository manifest_legacy_image = mli.image assert manifest_legacy_image == repo_tag.image, _vs(manifest_legacy_image.id, repo_tag.image_id)
def associate_generated_tag_manifest_with_tag(tag, manifest, storage_id_map): oci_manifest = _populate_manifest_and_blobs(tag.repository, manifest, storage_id_map) with db_transaction(): try: (Tag.select().join(TagToRepositoryTag).where( TagToRepositoryTag.repository_tag == tag)).get() except Tag.DoesNotExist: oci_tag = Tag.create( repository=tag.repository, manifest=oci_manifest, name=tag.name, reversion=tag.reversion, lifetime_start_ms=tag.lifetime_start_ts * 1000, lifetime_end_ms=(tag.lifetime_end_ts * 1000 if tag.lifetime_end_ts else None), tag_kind=Tag.tag_kind.get_id("tag"), ) TagToRepositoryTag.create(tag=oci_tag, repository_tag=tag, repository=tag.repository) return _associate_manifest(tag, oci_manifest)
def upgrade_progress(): total_tags = RepositoryTag.select().where(RepositoryTag.hidden == False).count() if total_tags == 0: return jsonify({ 'progress': 1.0, 'tags_remaining': 0, 'total_tags': 0, }) upgraded_tags = TagToRepositoryTag.select().count() return jsonify({ 'progress': float(upgraded_tags) / total_tags, 'tags_remaining': total_tags - upgraded_tags, 'total_tags': total_tags, })
def test_retarget_tag(initialized_db): repo = get_repository("devtable", "history") results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name="latest") assert len(results) == 2 assert results[0].lifetime_end_ms is None assert results[1].lifetime_end_ms is not None # Revert back to the original manifest. created = retarget_tag("latest", results[0].manifest, is_reversion=True, now_ms=results[1].lifetime_end_ms + 10000) assert created.lifetime_end_ms is None assert created.reversion assert created.name == "latest" assert created.manifest == results[0].manifest # Verify in the history. results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name="latest") assert len(results) == 3 assert results[0].lifetime_end_ms is None assert results[1].lifetime_end_ms is not None assert results[2].lifetime_end_ms is not None assert results[0] == created # Verify old-style tables. repository_tag = TagToRepositoryTag.get(tag=created).repository_tag assert repository_tag.lifetime_start_ts == int(created.lifetime_start_ms / 1000) tag_manifest = TagManifest.get(tag=repository_tag) assert TagManifestToManifest.get( tag_manifest=tag_manifest).manifest == created.manifest
def delete_tag(namespace_name, repository_name, tag_name, now_ms=None): now_ms = now_ms or get_epoch_timestamp_ms() now_ts = int(now_ms / 1000) with db_transaction(): try: query = _tag_alive( RepositoryTag.select( RepositoryTag, Repository).join(Repository).join( Namespace, on=(Repository.namespace_user == Namespace.id)).where( Repository.name == repository_name, Namespace.username == namespace_name, RepositoryTag.name == tag_name, ), now_ts, ) found = db_for_update(query).get() except RepositoryTag.DoesNotExist: msg = "Invalid repository tag '%s' on repository '%s/%s'" % ( tag_name, namespace_name, repository_name, ) raise DataModelException(msg) found.lifetime_end_ts = now_ts found.save() try: oci_tag_query = TagToRepositoryTag.select().where( TagToRepositoryTag.repository_tag == found) oci_tag = db_for_update(oci_tag_query).get().tag oci_tag.lifetime_end_ms = now_ms oci_tag.save() except TagToRepositoryTag.DoesNotExist: pass return found
def retarget_tag(tag_name, manifest_id, is_reversion=False, now_ms=None, adjust_old_model=True): """ Creates or updates a tag with the specified name to point to the given manifest under its repository. If this action is a reversion to a previous manifest, is_reversion should be set to True. Returns the newly created tag row or None on error. """ try: manifest = (Manifest.select( Manifest, MediaType).join(MediaType).where(Manifest.id == manifest_id).get()) except Manifest.DoesNotExist: return None # CHECK: Make sure that we are not mistargeting a schema 1 manifest to a tag with a different # name. if manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES: try: parsed = DockerSchema1Manifest(Bytes.for_string_or_unicode( manifest.manifest_bytes), validate=False) if parsed.tag != tag_name: logger.error( "Tried to re-target schema1 manifest with tag `%s` to tag `%s", parsed.tag, tag_name, ) return None except MalformedSchema1Manifest: logger.exception("Could not parse schema1 manifest") return None legacy_image = get_legacy_image_for_manifest(manifest) now_ms = now_ms or get_epoch_timestamp_ms() now_ts = int(now_ms / 1000) with db_transaction(): # Lookup an existing tag in the repository with the same name and, if present, mark it # as expired. existing_tag = get_tag(manifest.repository_id, tag_name) if existing_tag is not None: _, okay = set_tag_end_ms(existing_tag, now_ms) # TODO: should we retry here and/or use a for-update? if not okay: return None # Create a new tag pointing to the manifest with a lifetime start of now. created = Tag.create( name=tag_name, repository=manifest.repository_id, lifetime_start_ms=now_ms, reversion=is_reversion, manifest=manifest, tag_kind=Tag.tag_kind.get_id("tag"), ) # TODO: Remove the linkage code once RepositoryTag is gone. # If this is a schema 1 manifest, then add a TagManifest linkage to it. Otherwise, it will only # be pullable via the new OCI model. if adjust_old_model: if (manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES and legacy_image is not None): old_style_tag = RepositoryTag.create( repository=manifest.repository_id, image=legacy_image, name=tag_name, lifetime_start_ts=now_ts, reversion=is_reversion, ) TagToRepositoryTag.create(tag=created, repository_tag=old_style_tag, repository=manifest.repository_id) tag_manifest = TagManifest.create( tag=old_style_tag, digest=manifest.digest, json_data=manifest.manifest_bytes) TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest, repository=manifest.repository_id) return created
def assert_gc_integrity(expect_storage_removed=True, check_oci_tags=True): """ Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes the callback for images removed and doesn't invoke the callback for images *not* removed. """ # Add a callback for when images are removed. removed_image_storages = [] model.config.register_image_cleanup_callback(removed_image_storages.extend) # Store the number of dangling storages and labels. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() yield # Ensure the number of dangling storages, manifests and labels has not changed. updated_storage_count = _get_dangling_storage_count() assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() assert updated_label_count == existing_label_count, _get_dangling_labels() updated_manifest_count = _get_dangling_manifest_count() assert updated_manifest_count == existing_manifest_count # Ensure that for each call to the image+storage cleanup callback, the image and its # storage is not found *anywhere* in the database. for removed_image_and_storage in removed_image_storages: with pytest.raises(Image.DoesNotExist): Image.get(id=removed_image_and_storage.id) # Ensure that image storages are only removed if not shared. shared = Image.select().where( Image.storage == removed_image_and_storage.storage_id).count() if shared == 0: shared = (ManifestBlob.select().where( ManifestBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) # Ensure all CAS storage is in the storage engine. preferred = storage.preferred_locations[0] for storage_row in ImageStorage.select(): if storage_row.cas_path: storage.get_content({preferred}, storage.blob_path( storage_row.content_checksum)) for blob_row in ApprBlob.select(): storage.get_content({preferred}, storage.blob_path(blob_row.digest)) # Ensure there are no danglings OCI tags. if check_oci_tags: oci_tags = {t.id for t in Tag.select()} referenced_oci_tags = {t.tag_id for t in TagToRepositoryTag.select()} assert not oci_tags - referenced_oci_tags # Ensure all tags have valid manifests. for manifest in {t.manifest for t in Tag.select()}: # Ensure that the manifest's blobs all exist. found_blobs = { b.blob.content_checksum for b in ManifestBlob.select().where( ManifestBlob.manifest == manifest) } parsed = parse_manifest_from_bytes( Bytes.for_string_or_unicode(manifest.manifest_bytes), manifest.media_type.name) assert set(parsed.local_blob_digests) == found_blobs
def create_or_update_tag_for_repo(repository_id, tag_name, tag_docker_image_id, reversion=False, oci_manifest=None, now_ms=None): now_ms = now_ms or get_epoch_timestamp_ms() now_ts = int(now_ms / 1000) with db_transaction(): try: tag = db_for_update( _tag_alive( RepositoryTag.select().where( RepositoryTag.repository == repository_id, RepositoryTag.name == tag_name), now_ts, )).get() tag.lifetime_end_ts = now_ts tag.save() # Check for an OCI tag. try: oci_tag = db_for_update( Tag.select().join(TagToRepositoryTag).where( TagToRepositoryTag.repository_tag == tag)).get() oci_tag.lifetime_end_ms = now_ms oci_tag.save() except Tag.DoesNotExist: pass except RepositoryTag.DoesNotExist: pass except IntegrityError: msg = "Tag with name %s was stale when we tried to update it; Please retry the push" raise StaleTagException(msg % tag_name) try: image_obj = Image.get(Image.docker_image_id == tag_docker_image_id, Image.repository == repository_id) except Image.DoesNotExist: raise DataModelException("Invalid image with id: %s" % tag_docker_image_id) try: created = RepositoryTag.create( repository=repository_id, image=image_obj, name=tag_name, lifetime_start_ts=now_ts, reversion=reversion, ) if oci_manifest: # Create the OCI tag as well. oci_tag = Tag.create( repository=repository_id, manifest=oci_manifest, name=tag_name, lifetime_start_ms=now_ms, reversion=reversion, tag_kind=Tag.tag_kind.get_id("tag"), ) TagToRepositoryTag.create(tag=oci_tag, repository_tag=created, repository=repository_id) return created except IntegrityError: msg = "Tag with name %s and lifetime start %s already exists" raise TagAlreadyCreatedException(msg % (tag_name, now_ts))
def test_tagbackfillworker(clear_all_rows, initialized_db): # Remove the new-style rows so we can backfill. TagToRepositoryTag.delete().execute() Tag.delete().execute() if clear_all_rows: TagManifestLabelMap.delete().execute() ManifestLabel.delete().execute() ManifestBlob.delete().execute() ManifestLegacyImage.delete().execute() TagManifestToManifest.delete().execute() Manifest.delete().execute() found_dead_tag = False for repository_tag in list(RepositoryTag.select()): # Backfill the tag. assert backfill_tag(repository_tag) # Ensure if we try again, the backfill is skipped. assert not backfill_tag(repository_tag) # Ensure that we now have the expected tag rows. tag_to_repo_tag = TagToRepositoryTag.get(repository_tag=repository_tag) tag = tag_to_repo_tag.tag assert tag.name == repository_tag.name assert tag.repository == repository_tag.repository assert not tag.hidden assert tag.reversion == repository_tag.reversion if repository_tag.lifetime_start_ts is None: assert tag.lifetime_start_ms is None else: assert tag.lifetime_start_ms == (repository_tag.lifetime_start_ts * 1000) if repository_tag.lifetime_end_ts is None: assert tag.lifetime_end_ms is None else: assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts * 1000) found_dead_tag = True assert tag.manifest # Ensure that we now have the expected manifest rows. try: tag_manifest = TagManifest.get(tag=repository_tag) except TagManifest.DoesNotExist: continue map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) assert not map_row.broken manifest_row = map_row.manifest assert manifest_row.manifest_bytes == tag_manifest.json_data assert manifest_row.digest == tag_manifest.digest assert manifest_row.repository == tag_manifest.tag.repository assert tag.manifest == map_row.manifest legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert tag_manifest.tag.image == legacy_image expected_storages = {tag_manifest.tag.image.storage.id} for parent_image_id in tag_manifest.tag.image.ancestor_id_list(): expected_storages.add(Image.get(id=parent_image_id).storage_id) found_storages = { manifest_blob.blob_id for manifest_blob in ManifestBlob.select().where( ManifestBlob.manifest == manifest_row) } assert expected_storages == found_storages # Ensure the labels were copied over. tmls = list(TagManifestLabel.select().where( TagManifestLabel.annotated == tag_manifest)) expected_labels = {tml.label_id for tml in tmls} found_labels = { m.label_id for m in ManifestLabel.select().where( ManifestLabel.manifest == manifest_row) } assert found_labels == expected_labels # Verify at the repository level. for repository in list(Repository.select()): tags = RepositoryTag.select().where( RepositoryTag.repository == repository, RepositoryTag.hidden == False) oci_tags = Tag.select().where(Tag.repository == repository) assert len(tags) == len(oci_tags) assert {t.name for t in tags} == {t.name for t in oci_tags} for tag in tags: tag_manifest = TagManifest.get(tag=tag) ttr = TagToRepositoryTag.get(repository_tag=tag) manifest = ttr.tag.manifest assert tag_manifest.json_data == manifest.manifest_bytes assert tag_manifest.digest == manifest.digest assert tag.image == ManifestLegacyImage.get( manifest=manifest).image assert tag.lifetime_start_ts == (ttr.tag.lifetime_start_ms / 1000) if tag.lifetime_end_ts: assert tag.lifetime_end_ts == (ttr.tag.lifetime_end_ms / 1000) else: assert ttr.tag.lifetime_end_ms is None assert found_dead_tag
def lookup_map_row(repositorytag): try: TagToRepositoryTag.get(repository_tag=repositorytag) return True except TagToRepositoryTag.DoesNotExist: return False