def _backfill_labels(tag_manifest, manifest, repository): tmls = list(TagManifestLabel.select().where( TagManifestLabel.annotated == tag_manifest)) if not tmls: return for tag_manifest_label in tmls: label = tag_manifest_label.label try: TagManifestLabelMap.get(tag_manifest_label=tag_manifest_label) continue except TagManifestLabelMap.DoesNotExist: pass try: manifest_label = ManifestLabel.create(manifest=manifest, label=label, repository=repository) TagManifestLabelMap.create( manifest_label=manifest_label, tag_manifest_label=tag_manifest_label, label=label, manifest=manifest, tag_manifest=tag_manifest_label.annotated) except IntegrityError: continue
def _check_label_used(label_id): assert label_id is not None with db_transaction(): # Check if the label is referenced by another manifest or tag manifest. try: ManifestLabel.select().where(ManifestLabel.label == label_id).get() return True except ManifestLabel.DoesNotExist: pass try: TagManifestLabel.select().where(TagManifestLabel.label == label_id).get() return True except TagManifestLabel.DoesNotExist: pass return False
def _candidates_to_backfill(self): def missing_tmt_query(): return (TagManifestLabel.select().join( TagManifestLabelMap, JOIN.LEFT_OUTER).where(TagManifestLabelMap.id >> None)) min_id = (TagManifestLabel.select(fn.Min(TagManifestLabel.id)).join( TagManifestLabelMap, JOIN.LEFT_OUTER).where(TagManifestLabelMap.id >> None).scalar()) max_id = TagManifestLabel.select(fn.Max(TagManifestLabel.id)).scalar() iterator = yield_random_entries( missing_tmt_query, TagManifestLabel.id, 100, max_id, min_id, ) return iterator
def _garbage_collect_legacy_manifest(legacy_manifest_id, context): assert legacy_manifest_id is not None # Add the labels to be GCed. query = TagManifestLabel.select().where( TagManifestLabel.annotated == legacy_manifest_id) for manifest_label in query: context.add_label_id(manifest_label.label_id) # Delete the tag manifest. with db_transaction(): try: tag_manifest = TagManifest.select().where( TagManifest.id == legacy_manifest_id).get() except TagManifest.DoesNotExist: return False assert tag_manifest.id == legacy_manifest_id assert tag_manifest.tag.repository_id == context.repository.id # Delete any label mapping rows. (TagManifestLabelMap.delete().where( TagManifestLabelMap.tag_manifest == legacy_manifest_id).execute()) # Delete the label rows. TagManifestLabel.delete().where( TagManifestLabel.annotated == legacy_manifest_id).execute() # Delete the mapping row if it exists. try: tmt = (TagManifestToManifest.select().where( TagManifestToManifest.tag_manifest == tag_manifest).get()) context.add_manifest_id(tmt.manifest_id) tmt_deleted = tmt.delete_instance() if tmt_deleted: gc_table_rows_deleted.labels( table="TagManifestToManifest").inc() except TagManifestToManifest.DoesNotExist: pass # Delete the tag manifest. tag_manifest_deleted = tag_manifest.delete_instance() if tag_manifest_deleted: gc_table_rows_deleted.labels(table="TagManifest").inc() return True
def missing_tmt_query(): return (TagManifestLabel.select().join( TagManifestLabelMap, JOIN.LEFT_OUTER).where(TagManifestLabelMap.id >> None))
def _get_dangling_labels(): label_ids = set([current.id for current in Label.select()]) referenced_by_manifest = set( [mlabel.label_id for mlabel in TagManifestLabel.select()]) return label_ids - referenced_by_manifest
def test_tagbackfillworker(clear_all_rows, initialized_db): # Remove the new-style rows so we can backfill. TagToRepositoryTag.delete().execute() Tag.delete().execute() if clear_all_rows: TagManifestLabelMap.delete().execute() ManifestLabel.delete().execute() ManifestBlob.delete().execute() ManifestLegacyImage.delete().execute() TagManifestToManifest.delete().execute() Manifest.delete().execute() found_dead_tag = False for repository_tag in list(RepositoryTag.select()): # Backfill the tag. assert backfill_tag(repository_tag) # Ensure if we try again, the backfill is skipped. assert not backfill_tag(repository_tag) # Ensure that we now have the expected tag rows. tag_to_repo_tag = TagToRepositoryTag.get(repository_tag=repository_tag) tag = tag_to_repo_tag.tag assert tag.name == repository_tag.name assert tag.repository == repository_tag.repository assert not tag.hidden assert tag.reversion == repository_tag.reversion if repository_tag.lifetime_start_ts is None: assert tag.lifetime_start_ms is None else: assert tag.lifetime_start_ms == (repository_tag.lifetime_start_ts * 1000) if repository_tag.lifetime_end_ts is None: assert tag.lifetime_end_ms is None else: assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts * 1000) found_dead_tag = True assert tag.manifest # Ensure that we now have the expected manifest rows. try: tag_manifest = TagManifest.get(tag=repository_tag) except TagManifest.DoesNotExist: continue map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) assert not map_row.broken manifest_row = map_row.manifest assert manifest_row.manifest_bytes == tag_manifest.json_data assert manifest_row.digest == tag_manifest.digest assert manifest_row.repository == tag_manifest.tag.repository assert tag.manifest == map_row.manifest legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert tag_manifest.tag.image == legacy_image expected_storages = {tag_manifest.tag.image.storage.id} for parent_image_id in tag_manifest.tag.image.ancestor_id_list(): expected_storages.add(Image.get(id=parent_image_id).storage_id) found_storages = { manifest_blob.blob_id for manifest_blob in ManifestBlob.select().where( ManifestBlob.manifest == manifest_row) } assert expected_storages == found_storages # Ensure the labels were copied over. tmls = list(TagManifestLabel.select().where( TagManifestLabel.annotated == tag_manifest)) expected_labels = {tml.label_id for tml in tmls} found_labels = { m.label_id for m in ManifestLabel.select().where( ManifestLabel.manifest == manifest_row) } assert found_labels == expected_labels # Verify at the repository level. for repository in list(Repository.select()): tags = RepositoryTag.select().where( RepositoryTag.repository == repository, RepositoryTag.hidden == False) oci_tags = Tag.select().where(Tag.repository == repository) assert len(tags) == len(oci_tags) assert {t.name for t in tags} == {t.name for t in oci_tags} for tag in tags: tag_manifest = TagManifest.get(tag=tag) ttr = TagToRepositoryTag.get(repository_tag=tag) manifest = ttr.tag.manifest assert tag_manifest.json_data == manifest.manifest_bytes assert tag_manifest.digest == manifest.digest assert tag.image == ManifestLegacyImage.get( manifest=manifest).image assert tag.lifetime_start_ts == (ttr.tag.lifetime_start_ms / 1000) if tag.lifetime_end_ts: assert tag.lifetime_end_ts == (ttr.tag.lifetime_end_ms / 1000) else: assert ttr.tag.lifetime_end_ms is None assert found_dead_tag